Introduction

Candida glabrata strains:

  1. CBS138
  2. BG2

Treated with voriconazole (VCZ / VORI), fluconazole (FCZ / FLUC) or control with DMSO (solvent-only control, abbreviated CT).

Sequencing FASTQ files were aligned to appropriate genomes using HISAT2 and quantified using featureCounts.

MultiQC results, including quality control of raw reads (FastQC) and alignment are here:

  • CBS138: ../data/multiQC/Candida_glabrata_CBS138/multiqc_report.html)
  • BG2: ../data/multiQC/Candida_glabrata_BG2/multiqc_report.html)

featureCount results, aggregated by strain, are below:

counts_all_CBS138 = read.table("../data/featureCounts/Candida_glabrata_CBS138/CBS138_all_conditions_featureCounts.tab", header=TRUE)
counts_all_BG2 = read.table("../data/featureCounts/Candida_glabrata_BG2/BG2_all_conditions_featureCounts.tab", header=TRUE)

counts_rpm_CBS138 = read.table("../data/featureCounts/Candida_glabrata_CBS138/CBS138_all_conditions_rpm.tab", header=TRUE)
counts_rpm_BG2 = read.table("../data/featureCounts/Candida_glabrata_BG2/BG2_all_conditions_rpm.tab", header=TRUE)

head(counts_all_CBS138) # CBS138, raw
##         Geneid CBS138_RPMI_DMSO_A CBS138_RPMI_VORI_A CBS138_RPMI_FLUC_A
## 0 CAGL0A00165g                329               1277                953
## 1 CAGL0A00187g                541               1561               1755
## 2 CAGL0A00209g               2460               6237               7252
## 3 CAGL0A00231g                243                172                375
## 4 CAGL0A00253g                791               2009               2340
## 5 CAGL0A00275g               1295               2665               2876
##   CBS138_RPMI_DMSO_B CBS138_RPMI_VORI_B CBS138_RPMI_FLUC_B CBS138_RPMI_DMSO_C
## 0                653                873               1124                422
## 1               1240               1434               1990                950
## 2               5037               6009               7581               3992
## 3                292                305                235                426
## 4               1336               2031               2071               1217
## 5               2133               2716               2933               1643
##   CBS138_RPMI_VORI_C CBS138_RPMI_FLUC_C
## 0                436               3425
## 1               1120               7031
## 2               1345              18963
## 3                118               1572
## 4                395               8426
## 5                488               8892
head(counts_all_BG2) # BG2, raw
##         Geneid BG2_RPMI_DMSO_A BG2_RPMI_VORI_A BG2_RPMI_FLUC_A BG2_RPMI_DMSO_B
## 0 GWK60_A00011               1               1               1               1
## 1 GWK60_A00033             100             169              93             116
## 2 GWK60_A00055            1490            2411            1982            1417
## 3 GWK60_A00077            4781            8038            6834            5852
## 4 GWK60_A00099           14463           20284           15228           15459
## 5 GWK60_A00121            2371            1412             816            2188
##   BG2_RPMI_VORI_B BG2_RPMI_FLUC_B BG2_RPMI_DMSO_C BG2_RPMI_VORI_C
## 0               1               3               1               1
## 1             194              17             123              19
## 2            2422             304            1838             223
## 3            7534            1569            5704            1015
## 4           18965            2859           17357            1804
## 5             979             136            3000              63
##   BG2_RPMI_FLUC_C
## 0               1
## 1              26
## 2             394
## 3            1236
## 4            2528
## 5             146

Results normalised to RPM are below:

head(counts_rpm_CBS138) # CBS138, normalised to RPM
##         Geneid CBS138_RPMI_DMSO_A CBS138_RPMI_VORI_A CBS138_RPMI_FLUC_A
## 1 CAGL0A00165g           30.85212           65.75689           46.30258
## 2 CAGL0A00187g           50.73251           80.38097           85.26866
## 3 CAGL0A00209g          230.68756          321.16343          352.34663
## 4 CAGL0A00231g           22.78743            8.85684           18.21980
## 5 CAGL0A00253g           74.17637          103.44995          113.69155
## 6 CAGL0A00275g          121.43918          137.22953          139.73371
##   CBS138_RPMI_DMSO_B CBS138_RPMI_VORI_B CBS138_RPMI_FLUC_B CBS138_RPMI_DMSO_C
## 1           38.03028           47.18167           51.79789           32.91856
## 2           72.21676           77.50116           91.70624           74.10577
## 3          293.35146          324.75904          349.35928          311.40026
## 4           17.00588           16.48386           10.82963           33.23059
## 5           77.80773          109.76629           95.43900           94.93340
## 6          124.22447          146.78741          135.16301          128.16398
##   CBS138_RPMI_VORI_C CBS138_RPMI_FLUC_C
## 1           49.17631           46.07813
## 2          126.32448           94.59134
## 3          151.70216          255.11813
## 4           13.30919           21.14885
## 5           44.55194          113.35893
## 6           55.04138          119.62824
head(counts_rpm_BG2) # BG2, normalised to RPM
##         Geneid BG2_RPMI_DMSO_A BG2_RPMI_VORI_A BG2_RPMI_FLUC_A BG2_RPMI_DMSO_B
## 1 GWK60_A00011      0.01411581      0.01238536      0.01580123      0.01571183
## 2 GWK60_A00033      1.41158064      2.09312569      1.46951436      1.82257193
## 3 GWK60_A00055     21.03255147     29.86110080     31.31803713     22.26365885
## 4 GWK60_A00077     67.48767020     99.55351647    107.98560331     91.94561156
## 5 GWK60_A00099    204.15690735    251.22462406    240.62112485    242.88913348
## 6 GWK60_A00121     33.46857687     17.48812705     12.89380338     34.37747746
##   BG2_RPMI_VORI_B BG2_RPMI_FLUC_B BG2_RPMI_DMSO_C BG2_RPMI_VORI_C
## 1       0.0130404       0.2616564      0.01394962       0.1205643
## 2       2.5298376       1.4827197      1.71580380       2.2907212
## 3      31.5838485      26.5145175     25.63940963      26.8858332
## 4      98.2463727     136.8463092     79.56865753     122.3727387
## 5     247.3111836     249.3585711    242.12363055     217.4979513
## 6      12.7665515      11.8617578     41.84887317       7.5955493
##   BG2_RPMI_FLUC_C
## 1      0.09884977
## 2      2.57009413
## 3     38.94681101
## 4    122.17832084
## 5    249.89222903
## 6     14.43206702

Match genes across strains using Reciprocal Blast Best Hits

The genes between the two strains are matched using Reciprocal Blast Best Hits, which might not be entirely accurate given copy number variations etc. between the two species. I have used orthofinder https://doi.org/10.1186/s13059-019-1832-y to find orthologs, but I haven’t found a way to incorporate this into the analysis yet.

# Load reciprocal BLAST best hits
rbbh = read.csv("../data/rbbh/cbs138_bg2_rbbh2.csv")

# merge on CBS138 gene names,
counts_all_CBS138_rbbh = merge(counts_all_CBS138, rbbh, 
                               by.x="Geneid", by.y="gene_id_cbs138")

# combine counts from common genes.
counts_all_joined = merge(counts_all_CBS138_rbbh, 
                          dplyr::rename(counts_all_BG2, gene_id_bg2 = "Geneid"), 
                          by = "gene_id_bg2")
head(counts_all_joined)
##    gene_id_bg2       Geneid CBS138_RPMI_DMSO_A CBS138_RPMI_VORI_A
## 1 GWK60_A00011 CAGL0H10626g                119                153
## 2 GWK60_A00055 CAGL0A00165g                329               1277
## 3 GWK60_A00077 CAGL0A00187g                541               1561
## 4 GWK60_A00099 CAGL0A00209g               2460               6237
## 5 GWK60_A00121 CAGL0A00231g                243                172
## 6 GWK60_A00143 CAGL0A00253g                791               2009
##   CBS138_RPMI_FLUC_A CBS138_RPMI_DMSO_B CBS138_RPMI_VORI_B CBS138_RPMI_FLUC_B
## 1                371                218                354                317
## 2                953                653                873               1124
## 3               1755               1240               1434               1990
## 4               7252               5037               6009               7581
## 5                375                292                305                235
## 6               2340               1336               2031               2071
##   CBS138_RPMI_DMSO_C CBS138_RPMI_VORI_C CBS138_RPMI_FLUC_C BG2_RPMI_DMSO_A
## 1                238                205               2307               1
## 2                422                436               3425            1490
## 3                950               1120               7031            4781
## 4               3992               1345              18963           14463
## 5                426                118               1572            2371
## 6               1217                395               8426            5403
##   BG2_RPMI_VORI_A BG2_RPMI_FLUC_A BG2_RPMI_DMSO_B BG2_RPMI_VORI_B
## 1               1               1               1               1
## 2            2411            1982            1417            2422
## 3            8038            6834            5852            7534
## 4           20284           15228           15459           18965
## 5            1412             816            2188             979
## 6            8274            5540            4938            6905
##   BG2_RPMI_FLUC_B BG2_RPMI_DMSO_C BG2_RPMI_VORI_C BG2_RPMI_FLUC_C
## 1               3               1               1               1
## 2             304            1838             223             394
## 3            1569            5704            1015            1236
## 4            2859           17357            1804            2528
## 5             136            3000              63             146
## 6             984            5687             608            1004

Analysis

Quality control - sample correlation

For quality control, we check correlation between the samples (only within strain).

There is high correlation within control (DMSO) samples and treated (FCZ and VCZ), but no obvious differences between VCZ and FCZ.

This shows that the replicates are generally very well correlated, R > 0.96. However, there is more variability in replicate C, especially CBS138_RPMI_VORI_C.

Quality control - clustering

We intially cluster the results on raw rpm, separately by strain. Again untreated samples cluster together and treated samples cluster together. Treated samples cluster in a way that doesn’t indicate any obvious sample mix-up.

CBS138_RPMI_VORI_C is an outlier.

Calculate rlog

Regularized logarithm, on both strains together joined by best-hit genes.

sample_sheet_all = here::here("data", 
                         "sample_sheets", 
                         "DGE_samplesheet_canGla2.txt") %>%
  read_table()
## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   SampleID = col_double(),
##   Code = col_character(),
##   Medium = col_character(),
##   Temp = col_character(),
##   Time = col_character(),
##   Rep = col_character(),
##   Species = col_character(),
##   Strain = col_character(),
##   Condition = col_character()
## )
# samplesheet = py$experiment_data_both
rownames(sample_sheet_all) = sample_sheet_all$Code
## Warning: Setting row names on a tibble is deprecated.
print(sample_sheet_all)
## # A tibble: 18 × 9
##    SampleID Code               Medium Temp  Time  Rep   Species Strain Condition
##  *    <dbl> <chr>              <chr>  <chr> <chr> <chr> <chr>   <chr>  <chr>    
##  1       86 CBS138_RPMI_DMSO_A RPMI_… 37C   4h    A     Candid… CBS138 CBS138_R…
##  2       87 CBS138_RPMI_VORI_A RPMI_… 37C   4h    A     Candid… CBS138 CBS138_R…
##  3       88 CBS138_RPMI_FLUC_A RPMI_… 37C   4h    A     Candid… CBS138 CBS138_R…
##  4       89 BG2_RPMI_DMSO_A    RPMI_… 37C   4h    A     Candid… BG2    BG2_RPMI…
##  5       90 BG2_RPMI_VORI_A    RPMI_… 37C   4h    A     Candid… BG2    BG2_RPMI…
##  6       91 BG2_RPMI_FLUC_A    RPMI_… 37C   4h    A     Candid… BG2    BG2_RPMI…
##  7       92 CBS138_RPMI_DMSO_B RPMI_… 37C   4h    B     Candid… CBS138 CBS138_R…
##  8       93 CBS138_RPMI_VORI_B RPMI_… 37C   4h    B     Candid… CBS138 CBS138_R…
##  9       94 CBS138_RPMI_FLUC_B RPMI_… 37C   4h    B     Candid… CBS138 CBS138_R…
## 10       98 BG2_RPMI_DMSO_B    RPMI_… 37C   4h    B     Candid… BG2    BG2_RPMI…
## 11       99 BG2_RPMI_VORI_B    RPMI_… 37C   4h    B     Candid… BG2    BG2_RPMI…
## 12      100 BG2_RPMI_FLUC_B    RPMI_… 37C   4h    B     Candid… BG2    BG2_RPMI…
## 13       95 CBS138_RPMI_DMSO_C RPMI_… 37C   4h    C     Candid… CBS138 CBS138_R…
## 14       96 CBS138_RPMI_VORI_C RPMI_… 37C   4h    C     Candid… CBS138 CBS138_R…
## 15       97 CBS138_RPMI_FLUC_C RPMI_… 37C   4h    C     Candid… CBS138 CBS138_R…
## 16      101 BG2_RPMI_DMSO_C    RPMI_… 37C   4h    C     Candid… BG2    BG2_RPMI…
## 17      102 BG2_RPMI_VORI_C    RPMI_… 37C   4h    C     Candid… BG2    BG2_RPMI…
## 18      103 BG2_RPMI_FLUC_C    RPMI_… 37C   4h    C     Candid… BG2    BG2_RPMI…
dds_all <- DESeqDataSetFromMatrix(countData =
                                    counts_all_joined %>%
                             dplyr::select(sample_sheet_all$Code) %>%
                             magrittr::set_rownames(counts_all_joined$Gene),
                                  colData = sample_sheet_all,
                                  design = ~ Code)
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
rlog_all <- rlog(dds_all)
head(assay(rlog_all))
##              CBS138_RPMI_DMSO_A CBS138_RPMI_VORI_A CBS138_RPMI_FLUC_A
## CAGL0H10626g           7.532662           7.046592           7.803483
## CAGL0A00165g           9.819161          10.247470           9.981287
## CAGL0A00187g          10.850525          11.007853          11.079555
## CAGL0A00209g          12.708113          12.783775          12.891339
## CAGL0A00231g           9.134426           8.283834           8.798710
## CAGL0A00253g          11.102017          11.177522          11.284519
##              BG2_RPMI_DMSO_A BG2_RPMI_VORI_A BG2_RPMI_FLUC_A CBS138_RPMI_DMSO_B
## CAGL0H10626g        5.262604        5.237443        5.253129           7.578427
## CAGL0A00165g        9.575449        9.699059        9.706994           9.911436
## CAGL0A00187g       11.105937       11.269220       11.305368          11.037381
## CAGL0A00209g       12.660509       12.678386       12.613487          12.830194
## CAGL0A00231g        9.522984        8.829991        8.578657           8.830633
## CAGL0A00253g       11.176013       11.266810       11.107186          11.065763
##              CBS138_RPMI_VORI_B CBS138_RPMI_FLUC_B BG2_RPMI_DMSO_B
## CAGL0H10626g           7.866795           7.604660        5.264334
## CAGL0A00165g          10.004430          10.069135        9.552677
## CAGL0A00187g          11.013891          11.131689       11.283900
## CAGL0A00209g          12.830300          12.878901       12.730468
## CAGL0A00231g           8.731257           8.427881        9.468714
## CAGL0A00253g          11.263050          11.135540       11.118469
##              BG2_RPMI_VORI_B BG2_RPMI_FLUC_B CBS138_RPMI_DMSO_C
## CAGL0H10626g        5.239410        5.617187           7.879963
## CAGL0A00165g        9.723904        9.631222           9.758397
## CAGL0A00187g       11.239058       11.554708          11.019457
## CAGL0A00209g       12.646469       12.694132          12.838603
## CAGL0A00231g        8.581103        8.570093           9.332638
## CAGL0A00253g       11.140956       11.142942          11.186040
##              CBS138_RPMI_VORI_C CBS138_RPMI_FLUC_C BG2_RPMI_DMSO_C
## CAGL0H10626g           8.178808           8.388479        5.254219
## CAGL0A00165g          10.166875          10.021087        9.659647
## CAGL0A00187g          11.533233          11.203325       11.169054
## CAGL0A00209g          12.350243          12.666644       12.730393
## CAGL0A00231g           8.690081           8.954762        9.647514
## CAGL0A00253g          10.688575          11.326460       11.138934
##              BG2_RPMI_VORI_C BG2_RPMI_FLUC_C
## CAGL0H10626g        5.606251        5.549291
## CAGL0A00165g        9.623289        9.889308
## CAGL0A00187g       11.440342       11.419485
## CAGL0A00209g       12.563927       12.655815
## CAGL0A00231g        8.276823        8.672952
## CAGL0A00253g       10.997610       11.221235
rlog_all_df = as.data.frame(assay(rlog_all))

Clustering on rlog across strains

Clustering on normalized rlog across strains

Normalized rlog gets at relative (not absolute) differences in expression between strains. Here we additionally filter for a minimal count of 100, to remove low-count noise.

These clustering analyses support:

  • clustering by - / + drug dominates clustering by strain
  • the difference between the two drugs is minimal
  • within drug treated samples, strains cluster.
  • CBS138_RPMI_VORI_C is an outlier

We are not trying to get any more information out of this analysis, e.g. not trying to identify clusters of co-regulated genes, so there is no need to do more arranging the plots.

Quality Control - Principal Component Analysis

Calculate principal components from rlog.

# calculate principal components of rlog, after extracting from the dataset
pca_rlog <- rlog_all %>%
  assay() %>%
  t() %>%
  prcomp()

# convert principal components to data frame
pcdf_rlog <- bind_cols(
  as_tibble(colData(rlog_all)),
  as_tibble(pca_rlog$x)
)

pcdf_rlog
## # A tibble: 18 × 28
##    SampleID Code    Medium Temp  Time  Rep   Species Strain Condition sizeFactor
##       <dbl> <fct>   <chr>  <chr> <chr> <chr> <chr>   <chr>  <chr>          <dbl>
##  1       86 CBS138… RPMI_… 37C   4h    A     Candid… CBS138 CBS138_R…      0.368
##  2       87 CBS138… RPMI_… 37C   4h    A     Candid… CBS138 CBS138_R…      0.851
##  3       88 CBS138… RPMI_… 37C   4h    A     Candid… CBS138 CBS138_R…      0.870
##  4       89 BG2_RP… RPMI_… 37C   4h    A     Candid… BG2    BG2_RPMI…      2.29 
##  5       90 BG2_RP… RPMI_… 37C   4h    A     Candid… BG2    BG2_RPMI…      3.14 
##  6       91 BG2_RP… RPMI_… 37C   4h    A     Candid… BG2    BG2_RPMI…      2.56 
##  7       92 CBS138… RPMI_… 37C   4h    B     Candid… CBS138 CBS138_R…      0.650
##  8       93 CBS138… RPMI_… 37C   4h    B     Candid… CBS138 CBS138_R…      0.775
##  9       94 CBS138… RPMI_… 37C   4h    B     Candid… CBS138 CBS138_R…      0.923
## 10       98 BG2_RP… RPMI_… 37C   4h    B     Candid… BG2    BG2_RPMI…      2.25 
## 11       99 BG2_RP… RPMI_… 37C   4h    B     Candid… BG2    BG2_RPMI…      3.06 
## 12      100 BG2_RP… RPMI_… 37C   4h    B     Candid… BG2    BG2_RPMI…      0.435
## 13       95 CBS138… RPMI_… 37C   4h    C     Candid… CBS138 CBS138_R…      0.510
## 14       96 CBS138… RPMI_… 37C   4h    C     Candid… CBS138 CBS138_R…      0.318
## 15       97 CBS138… RPMI_… 37C   4h    C     Candid… CBS138 CBS138_R…      2.98 
## 16      101 BG2_RP… RPMI_… 37C   4h    C     Candid… BG2    BG2_RPMI…      2.52 
## 17      102 BG2_RP… RPMI_… 37C   4h    C     Candid… BG2    BG2_RPMI…      0.323
## 18      103 BG2_RP… RPMI_… 37C   4h    C     Candid… BG2    BG2_RPMI…      0.403
## # ℹ 18 more variables: PC1 <dbl>, PC2 <dbl>, PC3 <dbl>, PC4 <dbl>, PC5 <dbl>,
## #   PC6 <dbl>, PC7 <dbl>, PC8 <dbl>, PC9 <dbl>, PC10 <dbl>, PC11 <dbl>,
## #   PC12 <dbl>, PC13 <dbl>, PC14 <dbl>, PC15 <dbl>, PC16 <dbl>, PC17 <dbl>,
## #   PC18 <dbl>
propvar_rlog_df <- tibble(
  PC = seq.int(1L, ncol(pca_rlog$x) ),
  tot_var = pca_rlog$sdev^2,
  prop_var = tot_var/sum(tot_var)
)

propvar_rlog_df
## # A tibble: 18 × 3
##       PC  tot_var prop_var
##    <int>    <dbl>    <dbl>
##  1     1 1.68e+ 2 3.58e- 1
##  2     2 1.55e+ 2 3.31e- 1
##  3     3 6.87e+ 1 1.46e- 1
##  4     4 1.82e+ 1 3.88e- 2
##  5     5 1.60e+ 1 3.40e- 2
##  6     6 1.26e+ 1 2.68e- 2
##  7     7 5.75e+ 0 1.22e- 2
##  8     8 4.79e+ 0 1.02e- 2
##  9     9 3.80e+ 0 8.08e- 3
## 10    10 3.32e+ 0 7.07e- 3
## 11    11 3.07e+ 0 6.54e- 3
## 12    12 2.48e+ 0 5.28e- 3
## 13    13 2.09e+ 0 4.45e- 3
## 14    14 1.78e+ 0 3.79e- 3
## 15    15 1.29e+ 0 2.75e- 3
## 16    16 1.14e+ 0 2.44e- 3
## 17    17 1.04e+ 0 2.21e- 3
## 18    18 1.07e-26 2.29e-29
plot_percentvar_rlog <- 
  ggplot(data = propvar_rlog_df, 
         aes(x = PC, y = prop_var)) +
  geom_col(fill = "skyblue3") +
  scale_x_continuous("principal component",
                     limits = c(0.4,10.6), 
                     breaks = 1L:10L,
                     expand = c(0,0)) + 
  scale_y_continuous("prop. of variance", expand = c(0,0))
plot_percentvar_rlog
## Warning: Removed 8 rows containing missing values or values outside the scale range
## (`geom_col()`).

scales_PCA <- 
  list(
    scale_colour_manual("Strain, Treatment",
      values = c("CBS138_RPMI_DMSO" = "grey20",
                 "CBS138_RPMI_FLUC" = "purple3",
                 "CBS138_RPMI_VORI" = "green4",
                 "BG2_RPMI_DMSO" = "grey20",
                 "BG2_RPMI_FLUC" = "purple3",
                 "BG2_RPMI_VORI" = "green4"),
      labels = c("CBS138_RPMI_DMSO" = "CBS138, CT",
                 "CBS138_RPMI_FLUC" = "CBS138, FCZ",
                 "CBS138_RPMI_VORI" = "CBS138, VCZ",
                 "BG2_RPMI_DMSO"    = "BG2, CT",
                 "BG2_RPMI_FLUC"    = "BG2, FCZ",
                 "BG2_RPMI_VORI"    = "BG2, VCZ")),
    scale_shape_manual("Strain, Treatment",
      values = c("CBS138_RPMI_DMSO" = 16,
                 "CBS138_RPMI_FLUC" = 16,
                 "CBS138_RPMI_VORI" = 16,
                 "BG2_RPMI_DMSO" = 17,
                 "BG2_RPMI_FLUC" = 17,
                 "BG2_RPMI_VORI" = 17),
      labels = c("CBS138_RPMI_DMSO" = "CBS138, CT",
                 "CBS138_RPMI_FLUC" = "CBS138, FCZ",
                 "CBS138_RPMI_VORI" = "CBS138, VCZ",
                 "BG2_RPMI_DMSO"    = "BG2, CT",
                 "BG2_RPMI_FLUC"    = "BG2, FCZ",
                 "BG2_RPMI_VORI"    = "BG2, VCZ"))
  )


pc12 = ggplot(data = pcdf_rlog,
       aes(colour = Condition, shape = Condition)
       ) +
  geom_hline(yintercept = 0, colour = "grey90") +
  geom_vline(xintercept = 0, colour = "grey90") +
  geom_point(aes(x = PC1, y = PC2), size = 2) +
  theme(legend.box = "horizontal") +
  scales_PCA
legend = get_legend(pc12)
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'arial' not found in PostScript font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'arial' not found in PostScript font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'arial' not found in PostScript font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'arial' not found in PostScript font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'arial' not found in PostScript font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'arial' not found in PostScript font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'arial' not found in PostScript font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'arial' not found in PostScript font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'arial' not found in PostScript font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'arial' not found in PostScript font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'arial' not found in PostScript font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'arial' not found in PostScript font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'arial' not found in PostScript font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'arial' not found in PostScript font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family 'arial' not found in PostScript font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family 'arial' not found in PostScript font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family 'arial' not found in PostScript font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family 'arial' not found in PostScript font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family 'arial' not found in PostScript font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family 'arial' not found in PostScript font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family 'arial' not found in PostScript font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family 'arial' not found in PostScript font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family 'arial' not found in PostScript font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family 'arial' not found in PostScript font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family 'arial' not found in PostScript font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family 'arial' not found in PostScript font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family 'arial' not found in PostScript font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family 'arial' not found in PostScript font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family 'arial' not found in PostScript font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family 'arial' not found in PostScript font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family 'arial' not found in PostScript font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family 'arial' not found in PostScript font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family 'arial' not found in PostScript font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family 'arial' not found in PostScript font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family 'arial' not found in PostScript font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family 'arial' not found in PostScript font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family 'arial' not found in PostScript font database
## Warning in get_plot_component(plot, "guide-box"): Multiple components found;
## returning the first one. To return all, use `return_all = TRUE`.
pc12

pc32 = ggplot(data = pcdf_rlog,
       aes(colour = Condition, shape = Condition)
       ) +
  geom_hline(yintercept = 0, colour = "grey90") +
  geom_vline(xintercept = 0, colour = "grey90") +
  geom_point(aes(x = PC3, y = PC2), size = 2) +
  scales_PCA
pc32

ggplot(data = pcdf_rlog,
       aes(colour = Condition, shape = Rep)
       ) +
  geom_point(aes(x = PC3, y = PC2)) +
  scale_shape_identity()

# TODO: change styling to include shapes: circles squares triangles, maybe add Vs and Fs

Conclusions:

In PC1 vs PC2 there are 4 clear clusters: corresponding to treated and untreated strains BG2 and CBS138. In PC3 they look more similar.

The outlier: CBS138_RPMI_VORI_C (light purple) clusters with other bio reps in PC1-PC2, but not in PC3. After discussion, we decided that something was not quite right in that replicate.

Differential Gene Expression analysis using DESeq2 in each strain separately

Differential Gene Expression analysis was performed using DESeq2. Genes with adjusted p-value < 0.05 and at least 2x change are considered differentially expressed here. The upregulated and downregulated genes are saved in ../data/deseq2

DGE, CBS138, FCZ vs CT

dds_compared_conditions_cf = 
  run_deseq_vsmedium(counts_all_CBS138,
                     conditions_to_compare = c("CBS138_RPMI_DMSO", "CBS138_RPMI_FLUC"),
                     strains_to_include = "CBS138")

dds_compared_conditions_cf
## class: DESeqDataSet 
## dim: 5209 6 
## metadata(1): version
## assays(4): counts mu H cooks
## rownames(5209): CAGL0A00165g CAGL0A00187g ... CaglfMp11 CaglfMp12
## rowData names(22): baseMean baseVar ... deviance maxCooks
## colnames(6): CBS138_RPMI_DMSO_A CBS138_RPMI_FLUC_A ...
##   CBS138_RPMI_DMSO_C CBS138_RPMI_FLUC_C
## colData names(10): SampleID Code ... Condition sizeFactor
results(dds_compared_conditions_cf)
## log2 fold change (MLE): Medium RPMI FLUC vs RPMI DMSO 
## Wald test p-value: Medium RPMI FLUC vs RPMI DMSO 
## DataFrame with 5209 rows and 6 columns
##               baseMean log2FoldChange     lfcSE       stat    pvalue      padj
##              <numeric>      <numeric> <numeric>  <numeric> <numeric> <numeric>
## CAGL0A00165g   836.639      0.3197520  0.174032  1.8373124 0.0661638 0.1263219
## CAGL0A00187g  1591.597      0.2931970  0.179392  1.6343944 0.1021761 0.1790316
## CAGL0A00209g  6112.252      0.0175113  0.178075  0.0983363 0.9216652 0.9458500
## CAGL0A00231g   428.139     -0.6982981  0.308496 -2.2635540 0.0236016 0.0542966
## CAGL0A00253g  1940.918      0.2130427  0.173391  1.2286835 0.2191905 0.3298048
## ...                ...            ...       ...        ...       ...       ...
## CaglfMp08      1.19313      -1.383433  2.018901 -0.6852404 0.4931923        NA
## CaglfMp09     14.09621       0.318143  0.768816  0.4138090 0.6790140        NA
## CaglfMp10      1.55326      -0.160372  1.784641 -0.0898622 0.9283967        NA
## CaglfMp11    515.40070      -0.557760  0.296941 -1.8783528 0.0603329  0.117155
## CaglfMp12      9.10294       0.419579  1.024048  0.4097258 0.6820071        NA
rN = resultsNames(dds_compared_conditions_cf)
rN
## [1] "Intercept"                     "Medium_RPMI_FLUC_vs_RPMI_DMSO"
deseq_df_cf <- deseq_df_transform(dds_compared_conditions_cf, 
                               termstokeep = rN[2])

DEGdf_up2x_FDR5_cf = get_upregulated_genes(deseq_df_cf)
DEGdf_down2x_FDR5_cf = get_downregulated_genes(deseq_df_cf)
DEGdf_non_significant_cf = get_non_significant_genes(deseq_df_cf)

write.csv(deseq_df_cf, "../data/deseq2/CBS138/CBS138_FLUC_DMSO_allDESeq.csv")


p_cf = volcano_plot(deseq_df_cf, DEGdf_up2x_FDR5_cf, DEGdf_down2x_FDR5_cf,
                  xlabel = "log2 fold-change\n← down, FCZ      up, FCZ →") +
  ggtitle("FCZ vs CT, Strain CBS138")
#p_cf

DGE, CBS138, VCZ vs CT

dds_compared_conditions_cv = 
  run_deseq_vsmedium(counts_all_CBS138,
                     conditions_to_compare = c("CBS138_RPMI_DMSO", "CBS138_RPMI_VORI"),
                     strains_to_include = "CBS138")
dds_compared_conditions_cv
## class: DESeqDataSet 
## dim: 5209 6 
## metadata(1): version
## assays(4): counts mu H cooks
## rownames(5209): CAGL0A00165g CAGL0A00187g ... CaglfMp11 CaglfMp12
## rowData names(22): baseMean baseVar ... deviance maxCooks
## colnames(6): CBS138_RPMI_DMSO_A CBS138_RPMI_VORI_A ...
##   CBS138_RPMI_DMSO_C CBS138_RPMI_VORI_C
## colData names(10): SampleID Code ... Condition sizeFactor
results(dds_compared_conditions_cv)
## log2 fold change (MLE): Medium RPMI VORI vs RPMI DMSO 
## Wald test p-value: Medium RPMI VORI vs RPMI DMSO 
## DataFrame with 5209 rows and 6 columns
##               baseMean log2FoldChange     lfcSE      stat     pvalue      padj
##              <numeric>      <numeric> <numeric> <numeric>  <numeric> <numeric>
## CAGL0A00165g   602.177      0.5489343  0.253149  2.168422  0.0301266  0.107496
## CAGL0A00187g  1114.970      0.4538980  0.354983  1.278649  0.2010207  0.391611
## CAGL0A00209g  3724.394     -0.2077371  0.312624 -0.664496  0.5063732  0.691691
## CAGL0A00231g   260.923     -1.0150217  0.406456 -2.497247  0.0125162  0.057835
## CAGL0A00253g  1147.867     -0.0838943  0.345653 -0.242713  0.8082281  0.898777
## ...                ...            ...       ...       ...        ...       ...
## CaglfMp08      1.05901      -0.228622   2.10445 -0.108637 0.91349024        NA
## CaglfMp09      7.31921      -0.385422   1.07051 -0.360036 0.71881991 0.8484567
## CaglfMp10      1.05901      -0.228622   2.10445 -0.108637 0.91349024        NA
## CaglfMp11    281.70578      -1.337513   0.51890 -2.577593 0.00994911 0.0494646
## CaglfMp12      4.71152      -0.162560   1.34394 -0.120958 0.90372448 0.9500550
rN = resultsNames(dds_compared_conditions_cv)
rN
## [1] "Intercept"                     "Medium_RPMI_VORI_vs_RPMI_DMSO"
deseq_df_cv <- deseq_df_transform(dds_compared_conditions_cv, 
                               termstokeep = rN[2])

DEGdf_up2x_FDR5_cv = get_upregulated_genes(deseq_df_cv)
DEGdf_down2x_FDR5_cv = get_downregulated_genes(deseq_df_cv)
DEGdf_non_significant_cv = get_non_significant_genes(deseq_df_cv)

write.csv(deseq_df_cv, "../data/deseq2/CBS138/CBS138_VORI_DMSO_allDESeq.csv")


p_cv = volcano_plot(deseq_df_cv, DEGdf_up2x_FDR5_cv, DEGdf_down2x_FDR5_cv,
                  xlabel = "log2 fold-change\n← down, VCZ      up, VCZ →") +
  ggtitle("VCZ vs CT, Strain CBS138")
#p_cv

Check: DGE, CBS138, FCZ vs CT excluding the outlying sample CBS138_RPMI_VORI_C

Conclude: the single outlying sample CBS138_RPMI_VORI_C has minimal effect on the substantial results. Although it changes the genelists over the cutoff, it doesn’t change much most of the fold changes and p-values.

dds_compared_conditions_xc = 
  run_deseq_vsmedium(counts_all_CBS138,
                     conditions_to_compare = c("CBS138_RPMI_DMSO", "CBS138_RPMI_VORI"),
                     strains_to_include = "CBS138",
                     sample_sheet = dplyr::filter(sample_sheet_all,
                                                  Code != "CBS138_RPMI_VORI_C"))
dds_compared_conditions_xc
## class: DESeqDataSet 
## dim: 5209 5 
## metadata(1): version
## assays(4): counts mu H cooks
## rownames(5209): CAGL0A00165g CAGL0A00187g ... CaglfMp11 CaglfMp12
## rowData names(22): baseMean baseVar ... deviance maxCooks
## colnames(5): CBS138_RPMI_DMSO_A CBS138_RPMI_VORI_A CBS138_RPMI_DMSO_B
##   CBS138_RPMI_VORI_B CBS138_RPMI_DMSO_C
## colData names(10): SampleID Code ... Condition sizeFactor
results(dds_compared_conditions_xc)
## log2 fold change (MLE): Medium RPMI VORI vs RPMI DMSO 
## Wald test p-value: Medium RPMI VORI vs RPMI DMSO 
## DataFrame with 5209 rows and 6 columns
##               baseMean log2FoldChange     lfcSE        stat     pvalue
##              <numeric>      <numeric> <numeric>   <numeric>  <numeric>
## CAGL0A00165g   644.278      0.5271746  0.250260   2.1065089 0.03516017
## CAGL0A00187g  1074.564      0.0717238  0.227601   0.3151291 0.75266364
## CAGL0A00209g  4496.242      0.0194891  0.197717   0.0985705 0.92147929
## CAGL0A00231g   306.054     -1.1249116  0.399868  -2.8132084 0.00490498
## CAGL0A00253g  1393.269      0.1775422  0.206492   0.8598024 0.38989797
## ...                ...            ...       ...         ...        ...
## CaglfMp08      1.04557    -0.72864009  2.344279 -0.31081633  0.7559403
## CaglfMp09      9.46857     0.00909591  0.944194  0.00963351  0.9923137
## CaglfMp10      1.04557    -0.72864009  2.344279 -0.31081633  0.7559403
## CaglfMp11    348.06393    -1.23921807  0.516927 -2.39727857  0.0165174
## CaglfMp12      5.94529     0.16932639  1.274728  0.13283340  0.8943251
##                   padj
##              <numeric>
## CAGL0A00165g 0.0963664
## CAGL0A00187g 0.8409401
## CAGL0A00209g 0.9499427
## CAGL0A00231g 0.0209321
## CAGL0A00253g 0.5471509
## ...                ...
## CaglfMp08           NA
## CaglfMp09           NA
## CaglfMp10           NA
## CaglfMp11    0.0529289
## CaglfMp12           NA
rN = resultsNames(dds_compared_conditions_xc)
rN
## [1] "Intercept"                     "Medium_RPMI_VORI_vs_RPMI_DMSO"
deseq_df_xc <- deseq_df_transform(dds_compared_conditions_xc, 
                               termstokeep = rN[2])

DEGdf_up2x_FDR5_cvxc = get_upregulated_genes(deseq_df_xc)
DEGdf_down2x_FDR5_cvxc = get_downregulated_genes(deseq_df_xc)
DEGdf_non_significant_cvxc = get_non_significant_genes(deseq_df_xc)

# write.csv(deseq_df_xc, "../data/deseq2/CBS138/CBS138_VORI_DMSO_allDESeq.csv")


p_cvx = volcano_plot(deseq_df_xc, DEGdf_up2x_FDR5_cvxc, DEGdf_down2x_FDR5_cvxc,
                    xlabel = "log2 fold-change\n← down, VCZ      up, VCZ →") +
  ggtitle("VCZ vs CT, Strain CBS138, exclude outlying sample")
#p_cvx

deseq_df_3repsvsxc <- 
  dplyr::inner_join(deseq_df_cv, 
              deseq_df_xc, 
              by = "gene",
              suffix = c(".all", ".xc"))

ggplot(data = deseq_df_3repsvsxc, aes(x = log2FC.all, y = log2FC.xc)) + 
  geom_hline(yintercept = c(-1, 1), colour = "purple", linetype = "dashed") + 
  geom_vline(xintercept = c(-1, 1), colour = "purple", linetype = "dashed") +
  geom_abline(intercept = 0, slope = 1, colour = "purple") + 
    geom_point(size = 0.2, colour = "grey50") +
    geom_point(data = dplyr::filter(deseq_df_3repsvsxc,
                             gene %in% DEGdf_up2x_FDR5_cvxc$gene),
               size = 1, colour = "darkblue") +
    geom_point(data = dplyr::filter(deseq_df_3repsvsxc,
                             gene %in% DEGdf_down2x_FDR5_cvxc$gene),
               size = 1, colour = "darkred")

DGE, CBS138, VCZ vs FCZ

dds_compared_conditions_cvf <- 
  run_deseq_vsmedium(counts_all_CBS138,
                     conditions_to_compare = c("CBS138_RPMI_FLUC", "CBS138_RPMI_VORI"),
                     strains_to_include = "CBS138")

dds_compared_conditions_cvf
## class: DESeqDataSet 
## dim: 5209 6 
## metadata(1): version
## assays(4): counts mu H cooks
## rownames(5209): CAGL0A00165g CAGL0A00187g ... CaglfMp11 CaglfMp12
## rowData names(22): baseMean baseVar ... deviance maxCooks
## colnames(6): CBS138_RPMI_VORI_A CBS138_RPMI_FLUC_A ...
##   CBS138_RPMI_VORI_C CBS138_RPMI_FLUC_C
## colData names(10): SampleID Code ... Condition sizeFactor
results(dds_compared_conditions_cvf)
## log2 fold change (MLE): Medium RPMI VORI vs RPMI FLUC 
## Wald test p-value: Medium RPMI VORI vs RPMI FLUC 
## DataFrame with 5209 rows and 6 columns
##               baseMean log2FoldChange     lfcSE       stat    pvalue      padj
##              <numeric>      <numeric> <numeric>  <numeric> <numeric> <numeric>
## CAGL0A00165g  1101.737       0.207888  0.209804   0.990867  0.321751  0.735696
## CAGL0A00187g  2030.501       0.142320  0.337058   0.422242  0.672849  0.871859
## CAGL0A00209g  6232.873      -0.246693  0.309467  -0.797152  0.425363  0.783094
## CAGL0A00231g   322.077      -0.332331  0.420327  -0.790648  0.429149  0.786189
## CAGL0A00253g  2064.212      -0.317729  0.337930  -0.940222  0.347104  0.745921
## ...                ...            ...       ...        ...       ...       ...
## CaglfMp08      1.20986       1.128652  2.015912  0.5598716 0.5755670  0.836253
## CaglfMp09     13.45210      -0.687300  0.978899 -0.7021156 0.4826071  0.804700
## CaglfMp10      1.60583      -0.102210  1.799630 -0.0567951 0.9547084  0.982067
## CaglfMp11    360.79854      -0.796753  0.473493 -1.6827120 0.0924309  0.664200
## CaglfMp12      9.45453      -0.620352  1.265478 -0.4902118 0.6239840  0.852734
rN = resultsNames(dds_compared_conditions_cvf)
rN
## [1] "Intercept"                     "Medium_RPMI_VORI_vs_RPMI_FLUC"
deseq_df_cvf <- deseq_df_transform(dds_compared_conditions_cvf, 
                               termstokeep = rN[2])

DEGdf_up2x_FDR5_cvf = get_upregulated_genes(deseq_df_cvf)
DEGdf_down2x_FDR5_cvf = get_downregulated_genes(deseq_df_cvf)
DEGdf_non_significant_cvf = get_non_significant_genes(deseq_df_cvf)

write.csv(deseq_df_cvf, "../data/deseq2/CBS138/CBS138_VORI_FLUC_allDESeq.csv")

p_cvf = volcano_plot(deseq_df_cvf, DEGdf_up2x_FDR5_cvf, DEGdf_down2x_FDR5_cvf,
                    xlabel = "log2 fold-change\n← up, FCZ      up, VCZ →") +
  ggtitle("VCZ vs FCZ, Strain CBS138")
#p_cvf

Double-check: DGE, CBS138, VCZ vs FCZ excluding the outlying sample CBS138_RPMI_VORI_C

dds_compared_conditions_cvfxc <-
  run_deseq_vsmedium(counts_all_CBS138,
                     conditions_to_compare = c("CBS138_RPMI_FLUC", "CBS138_RPMI_VORI"),
                     strains_to_include = "CBS138",
                     sample_sheet = dplyr::filter(sample_sheet_all,
                                                  Code != "CBS138_RPMI_VORI_C"))

dds_compared_conditions_cvfxc
## class: DESeqDataSet 
## dim: 5209 5 
## metadata(1): version
## assays(4): counts mu H cooks
## rownames(5209): CAGL0A00165g CAGL0A00187g ... CaglfMp11 CaglfMp12
## rowData names(22): baseMean baseVar ... deviance maxCooks
## colnames(5): CBS138_RPMI_VORI_A CBS138_RPMI_FLUC_A CBS138_RPMI_VORI_B
##   CBS138_RPMI_FLUC_B CBS138_RPMI_FLUC_C
## colData names(10): SampleID Code ... Condition sizeFactor
results(dds_compared_conditions_cvfxc)
## log2 fold change (MLE): Medium RPMI VORI vs RPMI FLUC 
## Wald test p-value: Medium RPMI VORI vs RPMI FLUC 
## DataFrame with 5209 rows and 6 columns
##               baseMean log2FoldChange     lfcSE       stat    pvalue      padj
##              <numeric>      <numeric> <numeric>  <numeric> <numeric> <numeric>
## CAGL0A00165g  1326.551      0.1862951  0.202049  0.9220281  0.356514  0.804471
## CAGL0A00187g  2225.827     -0.2420684  0.164808 -1.4687912  0.141889  0.717400
## CAGL0A00209g  8281.619     -0.0190084  0.194410 -0.0977745  0.922111  0.985455
## CAGL0A00231g   394.159     -0.4441326  0.393893 -1.1275448  0.259512  0.777751
## CAGL0A00253g  2776.539     -0.0556065  0.186938 -0.2974592  0.766116  0.955101
## ...                ...            ...       ...        ...       ...       ...
## CaglfMp08      1.09792       0.613396  2.467883   0.248552  0.803708  0.962591
## CaglfMp09     19.15890      -0.318339  0.867010  -0.367169  0.713493  0.937821
## CaglfMp10      1.67950      -0.571602  2.132226  -0.268078  0.788639  0.962591
## CaglfMp11    477.06310      -0.699553  0.441275  -1.585301  0.112898  0.715920
## CaglfMp12     13.27455      -0.279166  1.192923  -0.234018  0.814971  0.965527
rN = resultsNames(dds_compared_conditions_cvfxc)
rN
## [1] "Intercept"                     "Medium_RPMI_VORI_vs_RPMI_FLUC"
deseq_df_cvfxc <- deseq_df_transform(dds_compared_conditions_cvfxc, 
                               termstokeep = rN[2])

DEGdf_up2x_FDR5_cvfxc = get_upregulated_genes(deseq_df_cvfxc)
DEGdf_down2x_FDR5_cvfxc = get_downregulated_genes(deseq_df_cvfxc)

volcano_plot(deseq_df_cvfxc, DEGdf_up2x_FDR5_cvfxc, DEGdf_down2x_FDR5_cvfxc,
                    xlabel = "log2 fold-change\n← up, FCZ      up, VCZ →") +
  ggtitle("VCZ vs FCZ, Strain CBS138")

Again this does not substantially change outcome: 7 DE genes when the sample is excluded, instead of zero.

DGE, BG2, FCZ vs CT

dds_compared_conditions_bf = 
  run_deseq_vsmedium(counts_all_BG2,
                     conditions_to_compare = c("BG2_RPMI_DMSO", "BG2_RPMI_FLUC"),
                     strains_to_include = "BG2")

dds_compared_conditions_bf
## class: DESeqDataSet 
## dim: 5234 6 
## metadata(1): version
## assays(4): counts mu H cooks
## rownames(5234): GWK60_A00011 GWK60_A00033 ... GWK60_M14047 GWK60_M14597
## rowData names(22): baseMean baseVar ... deviance maxCooks
## colnames(6): BG2_RPMI_DMSO_A BG2_RPMI_FLUC_A ... BG2_RPMI_DMSO_C
##   BG2_RPMI_FLUC_C
## colData names(10): SampleID Code ... Condition sizeFactor
results(dds_compared_conditions_bf)
## log2 fold change (MLE): Medium RPMI FLUC vs RPMI DMSO 
## Wald test p-value: Medium RPMI FLUC vs RPMI DMSO 
## DataFrame with 5234 rows and 6 columns
##                baseMean log2FoldChange     lfcSE      stat     pvalue      padj
##               <numeric>      <numeric> <numeric> <numeric>  <numeric> <numeric>
## GWK60_A00011    2.45412       2.770152  1.811888  1.528876 0.12629510  0.190938
## GWK60_A00033   63.40106      -0.112474  0.369791 -0.304155 0.76100947  0.820924
## GWK60_A00055  994.93649       0.262738  0.158995  1.652484 0.09843592  0.154071
## GWK60_A00077 3634.13071       0.404350  0.148619  2.720713 0.00651413  0.013871
## GWK60_A00099 8677.47644      -0.115410  0.102964 -1.120876 0.26234046  0.354232
## ...                 ...            ...       ...       ...        ...       ...
## GWK60_M13981  295.28461      -0.387677  0.229315 -1.690587 0.09091563 0.1444603
## GWK60_M14003  165.85087       0.337904  0.265481  1.272800 0.20308897 0.2852308
## GWK60_M14025  657.87791       0.476554  0.173193  2.751568 0.00593107 0.0127645
## GWK60_M14047   25.63011      -0.436302  0.694362 -0.628349 0.52977531 0.6210177
## GWK60_M14597    1.43472       1.809929  2.069943  0.874386 0.38190826 0.4805067
rN = resultsNames(dds_compared_conditions_bf)
rN
## [1] "Intercept"                     "Medium_RPMI_FLUC_vs_RPMI_DMSO"
deseq_df_bf <- deseq_df_transform(dds_compared_conditions_bf, 
                               termstokeep = rN[2])

DEGdf_up2x_FDR5_bf = get_upregulated_genes(deseq_df_bf)
DEGdf_down2x_FDR5_bf = get_downregulated_genes(deseq_df_bf)
DEGdf_non_significant_bf = get_non_significant_genes(deseq_df_bf)

write.csv(deseq_df_bf, "../data/deseq2/BG2/BG2_FLUC_DMSO_allDESeq.csv")

p_bf = volcano_plot(deseq_df_bf, DEGdf_up2x_FDR5_bf, DEGdf_down2x_FDR5_bf,
                    xlabel = "log2 fold-change\n← down, FCZ       up, FCZ →") +
  ggtitle("FCZ vs CT, Strain BG2")
#p_bf

DGE, BG2, VCZ vs CT

dds_compared_conditions_bv <-
  run_deseq_vsmedium(counts_all_BG2,
                     conditions_to_compare = c("BG2_RPMI_DMSO", "BG2_RPMI_VORI"),
                     strains_to_include = "BG2")
dds_compared_conditions_bv
## class: DESeqDataSet 
## dim: 5234 6 
## metadata(1): version
## assays(4): counts mu H cooks
## rownames(5234): GWK60_A00011 GWK60_A00033 ... GWK60_M14047 GWK60_M14597
## rowData names(22): baseMean baseVar ... deviance maxCooks
## colnames(6): BG2_RPMI_DMSO_A BG2_RPMI_VORI_A ... BG2_RPMI_DMSO_C
##   BG2_RPMI_VORI_C
## colData names(10): SampleID Code ... Condition sizeFactor
results(dds_compared_conditions_bv)
## log2 fold change (MLE): Medium RPMI VORI vs RPMI DMSO 
## Wald test p-value: Medium RPMI VORI vs RPMI DMSO 
## DataFrame with 5234 rows and 6 columns
##                 baseMean log2FoldChange     lfcSE      stat      pvalue
##                <numeric>      <numeric> <numeric> <numeric>   <numeric>
## GWK60_A00011     1.54035       0.706157  1.961312  0.360043    0.718815
## GWK60_A00033    98.51274       0.277688  0.270251  1.027519    0.304176
## GWK60_A00055  1310.77609       0.160270  0.135683  1.181213    0.237518
## GWK60_A00077  4651.26132       0.214837  0.156645  1.371487    0.170223
## GWK60_A00099 11796.21786      -0.148496  0.115114 -1.289987    0.197055
## ...                  ...            ...       ...       ...         ...
## GWK60_M13981   392.94428     -0.4170382  0.212398 -1.963478 0.049590615
## GWK60_M14003   205.94934      0.0780224  0.225819  0.345509 0.729711701
## GWK60_M14025   932.23403      0.5581566  0.163028  3.423687 0.000617778
## GWK60_M14047    31.46214     -0.8795011  0.645888 -1.361692 0.173295062
## GWK60_M14597     1.54035      0.7061569  1.961312  0.360043 0.718814933
##                    padj
##               <numeric>
## GWK60_A00011   0.796081
## GWK60_A00033   0.418082
## GWK60_A00055   0.345229
## GWK60_A00077   0.263283
## GWK60_A00099   0.297058
## ...                 ...
## GWK60_M13981 0.09448754
## GWK60_M14003 0.80389624
## GWK60_M14025 0.00212168
## GWK60_M14047 0.26732283
## GWK60_M14597 0.79608069
rN = resultsNames(dds_compared_conditions_bv)
rN
## [1] "Intercept"                     "Medium_RPMI_VORI_vs_RPMI_DMSO"
deseq_df_bv <- deseq_df_transform(dds_compared_conditions_bv, 
                               termstokeep = rN[2])

DEGdf_up2x_FDR5_bv = get_upregulated_genes(deseq_df_bv)
DEGdf_down2x_FDR5_bv = get_downregulated_genes(deseq_df_bv)
DEGdf_non_significant_bv = get_non_significant_genes(deseq_df_bv)

write.csv(deseq_df_bv, "../data/deseq2/BG2/BG2_VORI_DMSO_allDESeq.csv")

p_bv = volcano_plot(deseq_df_bv, DEGdf_up2x_FDR5_bv, DEGdf_down2x_FDR5_bv,
                    xlabel = "log2 fold-change\n← down, VCZ       up, VCZ →") +
  ggtitle("VCZ vs CT, Strain BG2")
#p_bv

DGE, BG2, VCZ vs FCZ

dds_compared_conditions_bvf <-
  run_deseq_vsmedium(counts_all_BG2,
                     conditions_to_compare = c("BG2_RPMI_FLUC", "BG2_RPMI_VORI"),
                     strains_to_include = "BG2")

dds_compared_conditions_bvf
## class: DESeqDataSet 
## dim: 5234 6 
## metadata(1): version
## assays(4): counts mu H cooks
## rownames(5234): GWK60_A00011 GWK60_A00033 ... GWK60_M14047 GWK60_M14597
## rowData names(22): baseMean baseVar ... deviance maxCooks
## colnames(6): BG2_RPMI_VORI_A BG2_RPMI_FLUC_A ... BG2_RPMI_VORI_C
##   BG2_RPMI_FLUC_C
## colData names(10): SampleID Code ... Condition sizeFactor
results(dds_compared_conditions_bvf)
## log2 fold change (MLE): Medium RPMI VORI vs RPMI FLUC 
## Wald test p-value: Medium RPMI VORI vs RPMI FLUC 
## DataFrame with 5234 rows and 6 columns
##                baseMean log2FoldChange     lfcSE      stat    pvalue      padj
##               <numeric>      <numeric> <numeric> <numeric> <numeric> <numeric>
## GWK60_A00011    2.37448     -2.0840627  1.927729 -1.081098  0.279654   0.99942
## GWK60_A00033   55.52361      0.3658448  0.414331  0.882977  0.377249   0.99942
## GWK60_A00055  826.42686     -0.1219869  0.183334 -0.665383  0.505806   0.99942
## GWK60_A00077 3077.75169     -0.2033374  0.172526 -1.178587  0.238562   0.99942
## GWK60_A00099 6510.95108     -0.0480715  0.131391 -0.365867  0.714464   0.99942
## ...                 ...            ...       ...       ...       ...       ...
## GWK60_M13981  197.61007     -0.0748445  0.293316 -0.255167  0.798594   0.99942
## GWK60_M14003  134.59536     -0.2783927  0.308730 -0.901735  0.367198   0.99942
## GWK60_M14025  622.75233      0.0673620  0.173017  0.389336  0.697027   0.99942
## GWK60_M14047   15.29155     -0.4542346  0.788648 -0.575966  0.564638   0.99942
## GWK60_M14597    1.56582     -1.0663244  2.144045 -0.497342  0.618948   0.99942
rN = resultsNames(dds_compared_conditions_bvf)
rN
## [1] "Intercept"                     "Medium_RPMI_VORI_vs_RPMI_FLUC"
deseq_df_bvf <- deseq_df_transform(dds_compared_conditions_bvf, 
                               termstokeep = rN[2])

DEGdf_up2x_FDR5_bvf = get_upregulated_genes(deseq_df_bvf)
DEGdf_down2x_FDR5_bvf = get_downregulated_genes(deseq_df_bvf)
DEGdf_non_significant_bvf = get_non_significant_genes(deseq_df_bvf)

write.csv(deseq_df_bvf, "../data/deseq2/BG2/BG2_VORI_FLUC_allDESeq.csv")

p_bvf = volcano_plot(deseq_df_bvf, DEGdf_up2x_FDR5_bvf, DEGdf_down2x_FDR5_bvf,
                    xlabel = "log2 fold-change\n← up, FCZ       up, VCZ →") +
  ggtitle("VCZ vs FCZ, Strain BG2")
# p_bvf

Volcano plots of pairwise comparisons

How many genes were differentially expressed in each condition? [TODO: make it a figure]

CBS138:

## [1] "Upregulated CBS138 FCZ vs CT: 321"
## [1] "Downregulated CBS138 FCZ vs CT: 409"
## [1] "Upregulated in CBS138 VCZ vs CT: 338"
## [1] "Downregulated in CBS138 VCZ vs CT: 353"
## [1] "Upregulated in CBS138 VCZ vs FCZ: 0"
## [1] "Downregulated in CBS138 VCZ vs FCZ: 0"

BG2:

## [1] "Upregulated in BG2 FCZ vs CT: 271"
## [1] "Downregulated in BG2 FCZ vs CT: 450"
## [1] "Upregulated in BG2 VCZ vs CT: 234"
## [1] "Downregulated in BG2 VCZ vs CT: 328"
## [1] "Upregulated in BG2 VCZ vs FCZ: 0"
## [1] "Downregulated in BG2 VCZ vs FCZ: 0"

Conclusions:

  • no difference between FCZ and VCZ effect in either strain.
  • small but comparable number of up/downregulated genes in both strains.
  • the CBS138_FLUC outlier impacts the differential expression analysis, but not in a way that changes the overall conclusions, so it is reasonable to proceed with the analysis on all 3 replicates.

Identification of corresponding genes in S. cerevisiae and between the two C.glabrata strains

Using curated orthologs between C.glabrata CBS138 and S.cerevisiae (downloaded from candidagenome.org), I identified standard and systematic S. cerevisiae names for C.glabrata genes up- and down-regulated in FCZ vs CT. There is no need to do repeat it in VCZ, because there are no significant DEG between these two conditions.

Orthologs are also identified between C.glabrata strains using Reciprocal Blast Best Hits.

Here I look at overlap between genes upregulated/downregulated in FCZ treatment between CBS138 and BG2. This includes:

  • reporting the number of genes that are DE in one strain and have a homolog in the other - this shows how many genes unique for the strain are DE
  • reporting the number of genes that are DE in both strain - this shows whether the response is similar between CBS138 and BG2.
## [1] "Number of FCZ upregulated genes in CBS138: 321 that have a homolog in BG2: 314"
## [1] "Number of FCZ downregulated genes in CBS138: 409 that have a homolog in BG2: 405"
## [1] "Number of FCZ upregulated genes in BG2: 271 that have a homolog in CBS138: 258"
## [1] "Number of FCZ downregulated genes in BG2: 450 that have a homolog in CBS138: 443"
## [1] "Number of FCZ upregulated genes overlapping between CBS138 and BG2: 169"
## [1] "Number of FCZ downregulated genes overlapping between CBS138 and BG2: 269"

Conclusions:

  • most upregulated genes have homologs in both strains
  • only a little more than 50% of genes differentially expressed in one strain are also differentially expressed in the other

Gene Ontology analysis of up- and down-regulated genes in CBS138 and BG2

Based on this, I used GO term enrichment (candidagenome.org) of up- and downregulated genes in CBS138, BG2 and common to both strains.

The results are in ../data/go/ folder.

Upregulated genes

Both strains up (selected):

  • secondary alcohol biosynthetic process
  • sterol and lipid biosynthetic process
  • sulfur compound metabolic process
  • secondary alcohol metabolic process
  • small molecule metabolic process
  • alcohol biosynthetic process
  • small molecule biosynthetic process
  • organic hydroxy compound biosynthetic process
  • homoserine metabolic process
  • non-proteinogenic amino acid metabolic process
  • sulfur amino acid biosynthetic process
  • aspartate family amino acid metabolic process
  • amino acid import across plasma membrane
  • L-methionine biosynthetic process

Only CBS138 up:

  • carbohydrate metabolic process
  • oligosaccharide metabolic process
  • monosaccharide metabolic process
  • trehalose metabolic process

BG2 only up (selected):

  • lysine biosynthetic process via aminoadipic acid
  • proteinogenic amino acid metabolic process
  • L-amino acid metabolic process
  • aspartate family amino acid biosynthetic process

Conclusions:

  • both strains have upregulation of ergosterol and lipid biosynthesis genes and small molecule metabolism genes (which also includes ERG genes)
  • BG2 DEG are more enriched in amino acid metabolism genes
  • CBS138 are more enriched in carbohydrate metabolism genes - I think these are generally upregulated in stress?

Downregulated genes

Both strains down (selected):

  • ribosome biogenes
  • nucleic acid catabolism

CBS138 only:

  • arginine biosynthetic process
  • arginine metabolic process
  • glutamine family amino acid biosynthetic process
  • ornithine metabolic process
  • arginine biosynthetic process via ornithine
  • glutamine family amino acid metabolic process
  • ribosome biogenesis

BG2 only:

  • translation
  • ribosome biogenesis

Conclusions:

  • in both strains mostly ribosome biogenesis genes are downregulated

Comparison with transcription factor targets

Using data from two publications, both in CBS138:

I looked at differential expression of two transcription factor targets involved in antifungal resistance. This part of the analysis in CBS138 only.

This is not enrichment analysis, just search for overlapping genes.

For comparison, I’m also showing targets of S. cerevisiae homologs of these two genes (targets downloaded from SGD).

## [1] "Number of PDR1 targets in C.glabrata: 25, number of upregulated genes: 261, number of downregulated genes: 344"
## [1] "Intersection UP:"
## [1] "ATF2" "YOR1" "BAG7" "RTA1" "LAF1"
## [1] "Intersection DOWN:"
## [1] "PDR16" "RSB1"
## [1] "Number of PDR1 targets in S. cerevisiae: 44, , number of upregulated genes: 261, number of downregulated genes: 344"
## [1] "Intersection UP:"
## [1] "GSC2" "VPS4"
## [1] "Intersection DOWN:"
## [1] "RPC53" "RPO26"
## [1] "Number of UPC2A targets in C. glabrata: 237, , number of upregulated genes: 261, number of downregulated genes: 344"
## [1] "Intersection UP:"
## [1] "ERG1"    "YOR1"    "YOR1"    "YOR1"    "YOR1"    "YNR014W" "YER158C"
## [8] "HEM13"   "ADP1"
## [1] "Intersection DOWN:"
##  [1] "PMA1"      "PMA1"      "YCL048W-A" "DSE3"      "YJR115W"   "TIR2"     
##  [7] "FUI1"      "FUI1"      "INA1"      "INA1"      "CWP1"      "CWP1"     
## [13] "CWP2"      "CWP2"      "SRP40"     "PTR2"      "TPO1"      "YDL211C"  
## [19] "SNG1"      "MRH1"      "RSB1"      "RSB1"      "GTT3"      "GTT3"     
## [25] "TOS1"      "TOS1"
## [1] "Number of UPC2 targets in S. cerevisiae: 16, , number of upregulated genes: 261, number of downregulated genes: 344"
## [1] "Intersection UP:"
##  [1] "PRR2"  "ERG25" "ERG1"  "ERG11" "ERG11" "NCP1"  "ERG7"  "ERG3"  "ERG3" 
## [10] "ERG5"  "ERG2"
## [1] "Intersection DOWN:"
## [1] "FHN1"

Conclusions:

  • I’m not sure how reliable this data is, for example most ERG genes do not show up among C. glabrata UPC2A targets, but do show up among S. cerevisiae Upc2; especially that the UPC2A datasets has more than 10x genes than UPC2.
  • given that there is little overlap between CgPdr1 and ScPdr1 targets, I don’t know if it’s worth looking at S. cerevisiae orthologs
  • using binding motifs might be a better idea.

Differential gene expression comparison between the two strains

dds_compared_conditions_cbd <-
  run_deseq_vsstrain(counts_all_joined,
                     conditions_to_compare = c("CBS138_RPMI_DMSO", "BG2_RPMI_DMSO"))

dds_compared_conditions_cbd
## class: DESeqDataSet 
## dim: 5124 6 
## metadata(1): version
## assays(4): counts mu H cooks
## rownames(5124): CAGL0H10626g CAGL0A00165g ... CAGL0M14069g CAGL0M14091g
## rowData names(22): baseMean baseVar ... deviance maxCooks
## colnames(6): CBS138_RPMI_DMSO_A BG2_RPMI_DMSO_A ... CBS138_RPMI_DMSO_C
##   BG2_RPMI_DMSO_C
## colData names(10): SampleID Code ... Condition sizeFactor
results(dds_compared_conditions_cbd)
## log2 fold change (MLE): Strain CBS138 vs BG2 
## Wald test p-value: Strain CBS138 vs BG2 
## DataFrame with 5124 rows and 6 columns
##               baseMean log2FoldChange     lfcSE      stat      pvalue
##              <numeric>      <numeric> <numeric> <numeric>   <numeric>
## CAGL0H10626g   202.939       9.794973  0.873020  11.21965 3.26561e-29
## CAGL0A00165g   850.457       0.447408  0.156982   2.85006 4.37109e-03
## CAGL0A00187g  2190.078      -0.399844  0.174776  -2.28776 2.21516e-02
## CAGL0A00209g  7604.857       0.155994  0.135849   1.14829 2.50847e-01
## CAGL0A00231g   923.115      -0.712847  0.238711  -2.98623 2.82441e-03
## ...                ...            ...       ...       ...         ...
## CAGL0M14003g  7912.720       0.768992  0.175128   4.39103 1.12816e-05
## CAGL0M14025g 20232.067      -0.314485  0.144817  -2.17160 2.98856e-02
## CAGL0M14047g   450.065       1.261566  0.246975   5.10807 3.25462e-07
## CAGL0M14069g   145.683       0.579965  0.290451   1.99677 4.58500e-02
## CAGL0M14091g   621.588       0.883936  0.191359   4.61924 3.85141e-06
##                     padj
##                <numeric>
## CAGL0H10626g 4.30365e-27
## CAGL0A00165g 1.77195e-02
## CAGL0A00187g 6.63057e-02
## CAGL0A00209g 4.08304e-01
## CAGL0A00231g 1.24622e-02
## ...                  ...
## CAGL0M14003g 1.08490e-04
## CAGL0M14025g 8.27750e-02
## CAGL0M14047g 4.53170e-06
## CAGL0M14069g 1.15846e-01
## CAGL0M14091g 4.08585e-05
rN = resultsNames(dds_compared_conditions_cbd)
rN
## [1] "Intercept"            "Strain_CBS138_vs_BG2"
deseq_df_cbd <- deseq_df_transform(dds_compared_conditions_cbd, 
                               termstokeep = rN[2])
write.csv(deseq_df_cbd, "../data/deseq2/CompareStrains/CBS138_BG2_DMSO_allDESeq.csv")

DEGdf_up2x_FDR5_cbd = get_upregulated_genes(deseq_df_cbd)
DEGdf_down2x_FDR5_cbd = get_downregulated_genes(deseq_df_cbd)
DEGdf_non_significant_cbd = get_non_significant_genes(deseq_df_cbd)

print(DEGdf_up2x_FDR5_cbd, n = 20)
## # A tibble: 194 × 5
##    gene         baseMean log2FC stderror      padj
##    <chr>           <dbl>  <dbl>    <dbl>     <dbl>
##  1 CAGL0K00110g 13966.    12.5     0.809 3.70e- 51
##  2 CAGL0C00110g  4327.    10.3     0.274 1.58e-303
##  3 CAGL0H10626g   203.     9.79    0.873 4.30e- 27
##  4 CAGL0E00187g   550.     8.81    0.816 3.86e- 25
##  5 CAGL0E06666g  2836.     8.68    0.823 6.27e- 24
##  6 CAGL0K13024g  1275.     8.65    0.334 9.32e-145
##  7 CAGL0B00264g    69.1    7.83    0.803 1.63e- 20
##  8 CAGL0M00132g    49.9    6.75    0.736 3.45e- 18
##  9 CAGL0G10219g    52.1    5.84    1.52  8.46e-  4
## 10 CAGL0I00220g    17.2    4.77    0.833 1.91e-  7
## 11 CAGL0A01221g  9209.     4.71    0.163 1.59e-179
## 12 CAGL0D06534g     7.72   4.60    1.08  1.82e-  4
## 13 CAGL0E00275g   178.     4.56    0.312 4.70e- 46
## 14 CAGL0B00990g  7455.     4.51    0.226 6.87e- 86
## 15 CAGL0J11891g    19.9    4.25    0.693 1.89e-  8
## 16 CAGL0H06039g   107.     4.19    0.446 4.61e- 19
## 17 CAGL0M11044g     4.25   4.15    1.41  1.37e-  2
## 18 CAGL0M13387g     4.19   4.12    1.52  2.49e-  2
## 19 CAGL0C00781g     4.06   4.07    1.37  1.26e-  2
## 20 CAGL0F08217g   101.     3.89    0.492 1.34e- 13
## # ℹ 174 more rows
print(DEGdf_down2x_FDR5_cbd, n = 20)
## # A tibble: 265 × 5
##    gene         baseMean log2FC stderror      padj
##    <chr>           <dbl>  <dbl>    <dbl>     <dbl>
##  1 CAGL0A02233g  11597.   -9.06    0.584 1.08e- 51
##  2 CAGL0J11550g   3730.   -7.13    0.300 4.86e-122
##  3 CAGL0C00209g     56.8  -5.71    0.898 5.05e-  9
##  4 CAGL0H04279g     42.6  -5.28    0.904 9.96e-  8
##  5 CAGL0H02563g   3100.   -5.20    0.237 1.27e-103
##  6 CAGL0K00407g   2119.   -4.55    0.234 6.80e- 82
##  7 CAGL0B01232g   2758.   -4.28    0.209 1.85e- 90
##  8 CAGL0A01826g   1997.   -4.21    0.640 1.24e-  9
##  9 CAGL0E05984g    979.   -4.15    0.276 1.41e- 48
## 10 CAGL0I06336g    558.   -3.78    0.234 5.22e- 56
## 11 CAGL0G02937g     50.6  -3.66    0.617 6.05e-  8
## 12 CAGL0G01738g   4149.   -3.63    0.168 2.13e-101
## 13 CAGL0F06677g     13.5  -3.53    1.04  3.72e-  3
## 14 CAGL0J04004g   1049.   -3.48    0.248 2.15e- 42
## 15 CAGL0D02244g     22.3  -3.44    0.881 6.75e-  4
## 16 CAGL0K04565g    123.   -3.33    0.384 2.77e- 16
## 17 CAGL0L05434g   6247.   -3.24    0.198 1.62e- 57
## 18 CAGL0K08932g    462.   -3.24    0.273 3.13e- 30
## 19 CAGL0J03080g   2302.   -3.11    0.270 1.44e- 28
## 20 CAGL0F05137g    103.   -3.10    0.423 8.22e- 12
## # ℹ 245 more rows
print(DEGdf_non_significant_cbd, n = 20)
## # A tibble: 3,436 × 5
##    gene         baseMean log2FC stderror   padj
##    <chr>           <dbl>  <dbl>    <dbl>  <dbl>
##  1 CAGL0H05995g    41.6  -0.994    0.713 0.302 
##  2 CAGL0J07194g    39.3  -0.983    0.540 0.158 
##  3 CAGL0D03762g     3.23 -0.957    1.43  0.664 
##  4 CAGL0L12100g    10.6  -0.947    0.881 0.443 
##  5 CAGL0E03630g    71.6  -0.922    0.503 0.155 
##  6 CAGL0I01100g 17986.   -0.895    0.665 0.321 
##  7 CAGL0K04191g    52.4  -0.884    0.387 0.0663
##  8 CAGL0L03916g    75.6  -0.866    0.426 0.109 
##  9 CAGL0I03586g    32.3  -0.864    0.514 0.199 
## 10 CAGL0H00506g  1416.   -0.845    0.384 0.0781
## 11 CAGL0M05115g     8.40 -0.837    0.910 0.523 
## 12 CAGL0L07942g     3.04 -0.827    1.47  0.721 
## 13 CAGL0F06655g    84.7  -0.825    0.375 0.0784
## 14 CAGL0J07282g    57.1  -0.809    0.362 0.0730
## 15 CAGL0G09625g     3.02 -0.808    1.39  0.711 
## 16 CAGL0J06974g   451.   -0.803    0.358 0.0722
## 17 CAGL0A04675g    55.4  -0.769    0.562 0.312 
## 18 CAGL0B03201g   220.   -0.767    0.348 0.0774
## 19 CAGL0B01969g   103.   -0.767    0.409 0.144 
## 20 CAGL0D00198g   988.   -0.753    0.322 0.0587
## # ℹ 3,416 more rows
p_cbd = volcano_plot(deseq_df_cbd, DEGdf_up2x_FDR5_cbd, DEGdf_down2x_FDR5_cbd,
                    xlabel = "log2 fold-change\nup, BG2      up, CBS138\n←                        →") +
  ggtitle("Strain BG2 vs CBS138\nin RPMI CT")
#p_cbd

print("Upregulated genes:")
## [1] "Upregulated genes:"
nrow(DEGdf_up2x_FDR5_cbd)
## [1] 194
print("Downregulated genes")
## [1] "Downregulated genes"
nrow(DEGdf_down2x_FDR5_cbd)
## [1] 265
dds_compared_conditions_cbv <-
  run_deseq_vsstrain(counts_all_joined,
                     conditions_to_compare = c("CBS138_RPMI_VORI", "BG2_RPMI_VORI"))

dds_compared_conditions_cbv
## class: DESeqDataSet 
## dim: 5124 6 
## metadata(1): version
## assays(4): counts mu H cooks
## rownames(5124): CAGL0H10626g CAGL0A00165g ... CAGL0M14069g CAGL0M14091g
## rowData names(22): baseMean baseVar ... deviance maxCooks
## colnames(6): CBS138_RPMI_VORI_A BG2_RPMI_VORI_A ... CBS138_RPMI_VORI_C
##   BG2_RPMI_VORI_C
## colData names(10): SampleID Code ... Condition sizeFactor
results(dds_compared_conditions_cbv)
## log2 fold change (MLE): Strain CBS138 vs BG2 
## Wald test p-value: Strain CBS138 vs BG2 
## DataFrame with 5124 rows and 6 columns
##               baseMean log2FoldChange     lfcSE       stat      pvalue
##              <numeric>      <numeric> <numeric>  <numeric>   <numeric>
## CAGL0H10626g   197.928      9.6176284  1.001505  9.6031748 7.75192e-22
## CAGL0A00165g   964.438      0.8215197  0.234334  3.5057649 4.55297e-04
## CAGL0A00187g  2374.245     -0.1850700  0.347642 -0.5323578 5.94478e-01
## CAGL0A00209g  5795.186      0.0759132  0.302842  0.2506697 8.02070e-01
## CAGL0A00231g   298.234     -0.0204449  0.444757 -0.0459686 9.63335e-01
## ...                ...            ...       ...        ...         ...
## CAGL0M14003g 12219.286       1.227250  0.315691    3.88750 0.000101281
## CAGL0M14025g 16734.160      -0.438702  0.215256   -2.03805 0.041545417
## CAGL0M14047g   309.375       1.437608  0.590298    2.43539 0.014875631
## CAGL0M14069g   142.758       0.738347  0.317943    2.32226 0.020218906
## CAGL0M14091g   846.220       1.013221  0.231136    4.38366 0.000011670
##                     padj
##                <numeric>
## CAGL0H10626g 2.20542e-19
## CAGL0A00165g 6.66165e-03
## CAGL0A00187g 7.89708e-01
## CAGL0A00209g 9.09735e-01
## CAGL0A00231g 9.86629e-01
## ...                  ...
## CAGL0M14003g 0.001845771
## CAGL0M14025g 0.174818472
## CAGL0M14047g 0.093013561
## CAGL0M14069g 0.113458022
## CAGL0M14091g 0.000312297
rN = resultsNames(dds_compared_conditions_cbv)
rN
## [1] "Intercept"            "Strain_CBS138_vs_BG2"
deseq_df_cbv <- deseq_df_transform(dds_compared_conditions_cbv, 
                               termstokeep = rN[2])
write.csv(deseq_df_cbv, "../data/deseq2/CompareStrains/CBS138_BG2_VORI_allDESeq.csv")

DEGdf_up2x_FDR5_cbv = get_upregulated_genes(deseq_df_cbv)
DEGdf_down2x_FDR5_cbv = get_downregulated_genes(deseq_df_cbv)
DEGdf_non_significant_cbv = get_non_significant_genes(deseq_df_cbv)

print(DEGdf_up2x_FDR5_cbv, n = 20)
## # A tibble: 187 × 5
##    gene         baseMean log2FC stderror     padj
##    <chr>           <dbl>  <dbl>    <dbl>    <dbl>
##  1 CAGL0E00187g  2272.    11.3     0.742 1.85e-49
##  2 CAGL0K00110g  4669.    10.9     0.773 1.24e-42
##  3 CAGL0H10626g   198.     9.62    1.00  2.21e-19
##  4 CAGL0E06666g  3341.     9.55    0.461 8.39e-92
##  5 CAGL0C00110g  2533.     9.24    0.472 4.53e-82
##  6 CAGL0K13024g  1352.     8.93    0.442 2.46e-87
##  7 CAGL0M00132g    60.8    6.19    0.776 2.94e-13
##  8 CAGL0H09922g    16.8    5.78    1.41  9.10e- 4
##  9 CAGL0I00220g    40.9    5.72    0.820 3.55e-10
## 10 CAGL0B00264g    92.1    5.64    1.08  7.65e- 6
## 11 CAGL0L05984g   207.     5.62    0.387 5.33e-45
## 12 CAGL0D06468g    30.4    4.52    1.21  3.24e- 3
## 13 CAGL0E00275g   127.     4.23    0.557 5.47e-12
## 14 CAGL0A01221g  3954.     4.18    0.476 3.22e-16
## 15 CAGL0B00990g  3957.     4.10    0.270 4.66e-49
## 16 CAGL0G10175g     7.10   4.05    1.46  4.64e- 2
## 17 CAGL0I09856g  1402.     3.96    0.319 6.71e-33
## 18 CAGL0D06534g     8.72   3.93    1.28  2.22e- 2
## 19 CAGL0G10219g    26.2    3.90    1.25  2.02e- 2
## 20 CAGL0G06798g   266.     3.90    0.478 7.27e-14
## # ℹ 167 more rows
print(DEGdf_down2x_FDR5_cbv, n = 20)
## # A tibble: 256 × 5
##    gene         baseMean log2FC stderror     padj
##    <chr>           <dbl>  <dbl>    <dbl>    <dbl>
##  1 CAGL0A02233g   7253.   -9.15    0.473 2.99e-80
##  2 CAGL0J11550g   8046.   -8.02    0.510 8.79e-53
##  3 CAGL0C00209g    137.   -5.71    0.577 1.52e-20
##  4 CAGL0M03305g    271.   -4.79    0.726 4.43e- 9
##  5 CAGL0K00407g   1811.   -4.41    0.294 3.75e-48
##  6 CAGL0G02937g     35.6  -4.39    0.970 1.76e- 4
##  7 CAGL0H04279g     22.9  -4.20    0.962 3.26e- 4
##  8 CAGL0E05984g   3078.   -4.15    0.780 5.12e- 6
##  9 CAGL0K10164g   7452.   -4.08    0.680 1.51e- 7
## 10 CAGL0H02563g   2350.   -4.07    0.629 9.58e- 9
## 11 CAGL0G01738g   4511.   -4.06    0.346 3.15e-29
## 12 CAGL0H10076g     37.2  -4.06    0.773 7.35e- 6
## 13 CAGL0B01232g   1831.   -3.72    0.710 7.47e- 6
## 14 CAGL0K08932g    528.   -3.53    0.806 3.12e- 4
## 15 CAGL0I02838g    810.   -3.51    0.659 5.08e- 6
## 16 CAGL0C03740g   1012.   -3.44    0.361 5.00e-19
## 17 CAGL0F05137g     71.4  -3.42    0.607 1.08e- 6
## 18 CAGL0F08261g   7875.   -3.29    0.439 9.80e-12
## 19 CAGL0K04565g     61.2  -3.27    0.939 7.07e- 3
## 20 CAGL0M03465g    218.   -3.26    0.705 1.13e- 4
## # ℹ 236 more rows
print(DEGdf_non_significant_cbv, n = 20)
## # A tibble: 4,156 × 5
##    gene         baseMean log2FC stderror   padj
##    <chr>           <dbl>  <dbl>    <dbl>  <dbl>
##  1 CAGL0C01617g     25.4 -0.999    0.755 0.425 
##  2 CAGL0G05764g    693.  -0.998    0.379 0.0619
##  3 CAGL0B02475g  14151.  -0.994    0.478 0.163 
##  4 CAGL0F04565g   2890.  -0.991    0.617 0.312 
##  5 CAGL0I01914g    255.  -0.990    0.825 0.481 
##  6 CAGL0L06446g    303.  -0.987    0.706 0.393 
##  7 CAGL0E04752g  10591.  -0.987    0.363 0.0514
##  8 CAGL0I05098g   3520.  -0.984    0.384 0.0725
##  9 CAGL0L07722g  45288.  -0.984    0.677 0.368 
## 10 CAGL0H07337g    122.  -0.980    0.892 0.531 
## 11 CAGL0K07007g    289.  -0.978    0.418 0.111 
## 12 CAGL0J11220g  37486.  -0.976    0.544 0.248 
## 13 CAGL0J01397g    291.  -0.970    0.665 0.366 
## 14 CAGL0L01397g    921.  -0.967    0.534 0.242 
## 15 CAGL0J01419g   2198.  -0.966    0.417 0.114 
## 16 CAGL0J01800g     18.7 -0.963    0.731 0.429 
## 17 CAGL0M04411g    134.  -0.961    0.476 0.179 
## 18 CAGL0I04202g    134.  -0.953    0.895 0.545 
## 19 CAGL0K11275g    793.  -0.953    0.546 0.264 
## 20 CAGL0M07029g   1186.  -0.953    0.479 0.187 
## # ℹ 4,136 more rows
p_cbv = volcano_plot(deseq_df_cbv, DEGdf_up2x_FDR5_cbv, DEGdf_down2x_FDR5_cbv,
                  xlabel = "log2 fold-change\n← up, BG2      up, CBS138 →") +
  ggtitle("Strain comparison in RPMI VCZ")
#p_cbv

print("Upregulated genes:")
## [1] "Upregulated genes:"
nrow(DEGdf_up2x_FDR5_cbv)
## [1] 187
print("Downregulated genes")
## [1] "Downregulated genes"
nrow(DEGdf_down2x_FDR5_cbv)
## [1] 256
dds_compared_conditions_cbf <-
  run_deseq_vsstrain(counts_all_joined,
                     conditions_to_compare = c("CBS138_RPMI_FLUC", "BG2_RPMI_FLUC"))
dds_compared_conditions_cbf
## class: DESeqDataSet 
## dim: 5124 6 
## metadata(1): version
## assays(4): counts mu H cooks
## rownames(5124): CAGL0H10626g CAGL0A00165g ... CAGL0M14069g CAGL0M14091g
## rowData names(22): baseMean baseVar ... deviance maxCooks
## colnames(6): CBS138_RPMI_FLUC_A BG2_RPMI_FLUC_A ... CBS138_RPMI_FLUC_C
##   BG2_RPMI_FLUC_C
## colData names(10): SampleID Code ... Condition sizeFactor
results(dds_compared_conditions_cbf)
## log2 fold change (MLE): Strain CBS138 vs BG2 
## Wald test p-value: Strain CBS138 vs BG2 
## DataFrame with 5124 rows and 6 columns
##               baseMean log2FoldChange     lfcSE      stat      pvalue
##              <numeric>      <numeric> <numeric> <numeric>   <numeric>
## CAGL0H10626g   259.989       7.689615  1.049868   7.32436 2.40033e-13
## CAGL0A00165g   995.133       0.489868  0.169866   2.88385 3.92852e-03
## CAGL0A00187g  2675.261      -0.525863  0.155620  -3.37915 7.27105e-04
## CAGL0A00209g  7023.453       0.274270  0.150235   1.82561 6.79097e-02
## CAGL0A00231g   371.162       0.281902  0.249171   1.13136 2.57903e-01
## ...                ...            ...       ...       ...         ...
## CAGL0M14003g 10105.213       0.696847  0.137365   5.07296 3.91666e-07
## CAGL0M14025g 17658.342      -0.535564  0.127272  -4.20803 2.57611e-05
## CAGL0M14047g   573.991       2.299193  0.230542   9.97299 2.00106e-23
## CAGL0M14069g   182.854       0.666365  0.291378   2.28695 2.21990e-02
## CAGL0M14091g   870.946       0.992741  0.164226   6.04498 1.49425e-09
##                     padj
##                <numeric>
## CAGL0H10626g 6.83160e-12
## CAGL0A00165g 1.50642e-02
## CAGL0A00187g 3.65909e-03
## CAGL0A00209g 1.47316e-01
## CAGL0A00231g 4.01141e-01
## ...                  ...
## CAGL0M14003g 4.32437e-06
## CAGL0M14025g 1.89003e-04
## CAGL0M14047g 1.28143e-21
## CAGL0M14069g 6.14732e-02
## CAGL0M14091g 2.46937e-08
rN = resultsNames(dds_compared_conditions_cbf)
rN
## [1] "Intercept"            "Strain_CBS138_vs_BG2"
deseq_df_cbf <- deseq_df_transform(dds_compared_conditions_cbf, 
                               termstokeep = rN[2])
write.csv(deseq_df_cbf, "../data/deseq2/CompareStrains/CBS138_BG2_FLUC_allDESeq.csv")

DEGdf_up2x_FDR5_cbf = get_upregulated_genes(deseq_df_cbf)
DEGdf_down2x_FDR5_cbf = get_downregulated_genes(deseq_df_cbf)
DEGdf_non_significant_cbf = get_non_significant_genes(deseq_df_cbf)

print(DEGdf_up2x_FDR5_cbf, n = 20)
## # A tibble: 184 × 5
##    gene         baseMean log2FC stderror      padj
##    <chr>           <dbl>  <dbl>    <dbl>     <dbl>
##  1 CAGL0K00110g  9811.    10.4     1.21  3.51e- 16
##  2 CAGL0E00187g  2002.    10.3     0.754 4.96e- 40
##  3 CAGL0E06666g  3643.     8.84    1.19  3.86e- 12
##  4 CAGL0K13024g  1368.     8.40    0.344 3.37e-128
##  5 CAGL0H10626g   260.     7.69    1.05  6.83e- 12
##  6 CAGL0C00110g  3068.     7.43    0.783 1.26e- 19
##  7 CAGL0B00264g   101.     7.02    0.723 1.44e- 20
##  8 CAGL0I00220g    34.4    6.05    0.944 2.98e-  9
##  9 CAGL0M00132g    91.8    5.76    0.568 2.85e- 22
## 10 CAGL0H06039g    70.9    4.96    0.553 1.49e- 17
## 11 CAGL0J11891g    12.4    4.32    1.15  1.02e-  3
## 12 CAGL0A01221g  5692.     4.30    0.146 5.26e-188
## 13 CAGL0E00275g   121.     4.23    0.393 5.05e- 25
## 14 CAGL0H09922g    14.0    4.21    1.05  3.98e-  4
## 15 CAGL0L06072g    51.9    4.03    0.616 1.34e-  9
## 16 CAGL0L05984g   227.     3.95    0.295 1.44e- 38
## 17 CAGL0F00209g    38.5    3.90    0.629 9.73e-  9
## 18 CAGL0D06534g     9.01   3.74    1.21  8.75e-  3
## 19 CAGL0E03894g   562.     3.66    0.259 6.89e- 43
## 20 CAGL0M00308g    67.6    3.60    0.493 7.72e- 12
## # ℹ 164 more rows
print(DEGdf_down2x_FDR5_cbf, n = 20)
## # A tibble: 242 × 5
##    gene         baseMean log2FC stderror      padj
##    <chr>           <dbl>  <dbl>    <dbl>     <dbl>
##  1 CAGL0A02233g   7338.   -8.53    1.01  1.26e- 15
##  2 CAGL0J11550g  10582.   -8.01    0.218 1.69e-290
##  3 CAGL0C00209g    161.   -5.70    0.392 1.73e- 45
##  4 CAGL0H04279g     15.0  -4.82    1.05  3.92e-  5
##  5 CAGL0H03135g     11.1  -4.28    1.03  2.40e-  4
##  6 CAGL0B01232g   1831.   -4.13    0.184 5.41e-108
##  7 CAGL0M03305g    362.   -4.06    0.307 9.26e- 38
##  8 CAGL0F06677g     17.7  -4.03    0.879 4.03e-  5
##  9 CAGL0K00407g   1931.   -3.93    0.465 1.16e- 15
## 10 CAGL0C03872g  15011.   -3.64    0.158 4.49e-115
## 11 CAGL0G01738g   4948.   -3.60    0.187 5.78e- 80
## 12 CAGL0C03740g   1086.   -3.46    0.193 5.54e- 69
## 13 CAGL0D02244g     22.5  -3.12    0.699 6.66e-  5
## 14 CAGL0H10076g     38.1  -3.09    0.531 8.91e-  8
## 15 CAGL0I02838g    865.   -3.08    0.189 1.77e- 57
## 16 CAGL0L06996g     80.8  -3.00    0.381 1.01e- 13
## 17 CAGL0B00220g     17.1  -2.96    0.945 7.76e-  3
## 18 CAGL0J03080g   7456.   -2.94    0.194 3.42e- 49
## 19 CAGL0F08261g  12221.   -2.93    0.183 5.88e- 55
## 20 CAGL0K10164g   9075.   -2.91    0.147 2.54e- 84
## # ℹ 222 more rows
print(DEGdf_non_significant_cbf, n = 20)
## # A tibble: 3,294 × 5
##    gene         baseMean log2FC stderror  padj
##    <chr>           <dbl>  <dbl>    <dbl> <dbl>
##  1 CAGL0K08712g     7.90 -0.999    1.02  0.474
##  2 CAGL0D01694g    11.7  -0.976    0.839 0.386
##  3 CAGL0F04521g    18.8  -0.975    0.629 0.228
##  4 CAGL0G09229g     3.51 -0.968    1.43  0.640
##  5 CAGL0I10450g     5.43 -0.964    1.17  0.555
##  6 CAGL0A01892g     1.31 -0.952    2.24  0.778
##  7 CAGL0A03806g     1.31 -0.952    2.24  0.778
##  8 CAGL0B00242g     1.31 -0.952    2.24  0.778
##  9 CAGL0C02783g     1.31 -0.952    2.24  0.778
## 10 CAGL0D01518g     1.31 -0.952    2.24  0.778
## 11 CAGL0D04400g     1.31 -0.952    2.24  0.778
## 12 CAGL0E03432g     1.31 -0.952    2.24  0.778
## 13 CAGL0G09647g     1.31 -0.952    2.24  0.778
## 14 CAGL0L01023g     1.31 -0.952    2.24  0.778
## 15 CAGL0L01045g     1.31 -0.952    2.24  0.778
## 16 CAGL0L07568g     1.31 -0.952    2.24  0.778
## 17 CAGL0M05115g    22.7  -0.950    0.587 0.206
## 18 CAGL0M12100g     8.30 -0.950    0.996 0.487
## 19 CAGL0B02651g    13.2  -0.938    0.888 0.436
## 20 CAGL0L12100g     9.51 -0.924    0.925 0.464
## # ℹ 3,274 more rows
p_cbf = volcano_plot(deseq_df_cbf, DEGdf_up2x_FDR5_cbf, DEGdf_down2x_FDR5_cbf,
                    xlabel = "log2 fold-change\n← up, BG2      up, CBS138 →") +
  ggtitle("Strain comparison in RPMI FCZ")
#p_cbf

print("Upregulated genes:")
## [1] "Upregulated genes:"
nrow(DEGdf_up2x_FDR5_cbf)
## [1] 184
print("Downregulated genes")
## [1] "Downregulated genes"
nrow(DEGdf_down2x_FDR5_cbf)
## [1] 242

Report number of differentially expressed genes in CBS138 vs BG2:

## [1] "CT upregulated: 194, downregulated: 265"
## [1] "FCZ upregulated: 184, downregulated: 242"
## [1] "VCZ upregulated: 187, downregulated: 256"

GO analysis of strain differences

CT upregulated in CBS138 (selected):

  • amino acid biosynthesis
  • cell adhesion
  • spore wall assembly
  • cell wall assembly

CT downregulated in CBS138 (selected):

  • trehalose metabolism
  • small molecule catabolism
  • lipid catabolism

FCZ upregulated in CBS138:

  • cell adhesion: AWP2, CAGL0A01584g, CAGL0I09856g, CAGL0K12078g, EPA23, EPA6

FCZ downregulated in CBS138 (selected):

  • small molecule catabolism
  • energy derivation by oxidation of organic compounds

VCZ upregulated in CBS138:

  • cell adhesion: AWP2, CAGL0A01584g, CAGL0I09856g, EPA23, EPA3, EPA6
  • filamentous growth: CAGL0H02145g, CAGL0I01254g, CAGL0I09856g, CAGL0K04169g, CAGL0M03663g, CAGL0M09042g, EFG1, SHO1, TEC1, YPS2

VCZ downregulated in CBS138:

  • small molecule catabolism
  • trehalose metabolism
  • carbohydrate metabolism
  • energy derivation by oxidation of organic compounds

Analysis of Drug vs no Drug and strain differences

The above conclusions for pairwise comparisons suggest that we should compare:

  • Drug vs no drug, taking average of VCZ and FCZ responses
  • and compare across strains to get the strain-specific drug interactions

Differential expression shared across both strains, both drugs

Here, “up” means that both drugs induces more transcript abundance in both strains. Essentially, treating both drugs as biological replicates, and both strains as biological replicates.

sample_sheet_all_drugnodrug <-
  sample_sheet_all %>%
  # select only relevant columns
  dplyr::select(SampleID, Code, Strain, Medium, Rep) %>%
  # make Strain a factor in predictable order 
  mutate(Strain = factor(Strain, 
                         levels = c("BG2", "CBS138"))
         ) %>%
  # create two new columns for Drug, and Voriconazole vs Fluconazole
  mutate(Drug = Medium %in% c("RPMI_FLUC", "RPMI_VORI"),
         VORI = Medium %in% c("RPMI_VORI")) %>%
  magrittr::set_rownames(.$Code)
## Warning: Setting row names on a tibble is deprecated.
dds_drugnodrug_bc <- 
  DESeqDataSetFromMatrix(
    countData = counts_all_joined %>%
      dplyr::select(sample_sheet_all_drugnodrug$Code) %>%
      magrittr::set_rownames(counts_all_joined$Gene),
    colData = sample_sheet_all_drugnodrug,
    design = ~ Strain + Drug) %>%
  DESeq()
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
resultsNames(dds_drugnodrug_bc)
## [1] "Intercept"            "Strain_CBS138_vs_BG2" "DrugTRUE"
deseq_df_drugnodrug_bc  <- 
  deseq_df_transform(dds_drugnodrug_bc,
                     termstokeep = "DrugTRUE")

deseq_df_drugnodrug_bc
## # A tibble: 5,124 × 5
##    gene         baseMean  log2FC stderror         padj
##    <chr>           <dbl>   <dbl>    <dbl>        <dbl>
##  1 CAGL0H10626g     220.  0.429    0.363  0.324       
##  2 CAGL0A00165g     939.  0.333    0.0975 0.00167     
##  3 CAGL0A00187g    2413.  0.358    0.136  0.0174      
##  4 CAGL0A00209g    6755. -0.0959   0.115  0.501       
##  5 CAGL0A00231g     515. -1.29     0.218  0.0000000196
##  6 CAGL0A00253g    2282.  0.0481   0.133  0.787       
##  7 CAGL0A00275g    2973. -0.196    0.140  0.234       
##  8 CAGL0A00297g    1698.  0.0577   0.115  0.699       
##  9 CAGL0A00319g    1804.  0.418    0.0855 0.00000436  
## 10 CAGL0A00341g     654.  0.883    0.205  0.0000584   
## # ℹ 5,114 more rows
DEGdf_up2x_FDR5_drug = get_upregulated_genes(deseq_df_drugnodrug_bc)
DEGdf_down2x_FDR5_drug = get_downregulated_genes(deseq_df_drugnodrug_bc)

print(DEGdf_up2x_FDR5_drug, n = 20)
## # A tibble: 258 × 5
##    gene         baseMean log2FC stderror      padj
##    <chr>           <dbl>  <dbl>    <dbl>     <dbl>
##  1 CAGL0A01089g   24754.   4.34    0.148 2.89e-185
##  2 CAGL0J03916g    8684.   3.88    0.169 1.56e-113
##  3 CAGL0L06094g    4710.   3.65    0.191 2.78e- 78
##  4 CAGL0F03641g    7427.   3.54    0.304 1.47e- 29
##  5 CAGL0K08668g    1429.   3.52    0.180 4.47e- 82
##  6 CAGL0D05940g   23664.   3.05    0.170 1.49e- 69
##  7 CAGL0M03839g   21921.   2.99    0.199 1.15e- 48
##  8 CAGL0B02838g    7089.   2.93    0.220 1.99e- 38
##  9 CAGL0F07029g    3902.   2.85    0.209 1.92e- 40
## 10 CAGL0B03663g    5540.   2.60    0.181 1.32e- 44
## 11 CAGL0M07403g    2557.   2.57    0.162 2.99e- 54
## 12 CAGL0M01760g   15392.   2.51    0.220 1.80e- 28
## 13 CAGL0D01474g    1672.   2.51    0.211 8.67e- 31
## 14 CAGL0C03872g   10721.   2.50    0.266 1.81e- 19
## 15 CAGL0K06677g     840.   2.45    0.195 2.91e- 34
## 16 CAGL0L06776g    1896.   2.44    0.206 1.96e- 30
## 17 CAGL0J04158g   13362.   2.43    0.202 1.85e- 31
## 18 CAGL0I05610g    6319.   2.40    0.205 9.53e- 30
## 19 CAGL0E05984g    2581.   2.38    0.479 3.07e-  6
## 20 CAGL0K10824g    1318.   2.36    0.193 3.33e- 32
## # ℹ 238 more rows
print(DEGdf_down2x_FDR5_drug, n = 20)
## # A tibble: 355 × 5
##    gene         baseMean log2FC stderror     padj
##    <chr>           <dbl>  <dbl>    <dbl>    <dbl>
##  1 CAGL0H10076g    136.   -3.43    0.287 4.70e-31
##  2 CAGL0E02255g   1670.   -3.08    0.346 1.02e-17
##  3 CAGL0H08712g    114.   -2.84    0.285 6.21e-22
##  4 CAGL0L10142g    132.   -2.80    0.281 8.07e-22
##  5 CAGL0M03305g    627.   -2.78    0.500 1.45e- 7
##  6 CAGL0L01551g   1556.   -2.69    0.198 4.96e-40
##  7 CAGL0J05390g    208.   -2.46    0.384 1.07e- 9
##  8 CAGL0L13156g    229.   -2.25    0.261 1.10e-16
##  9 CAGL0B00792g    252.   -2.21    0.239 4.93e-19
## 10 CAGL0C02211g    674.   -2.21    0.217 9.15e-23
## 11 CAGL0K06259g   3403.   -2.19    0.306 8.23e-12
## 12 CAGL0K00605g    272.   -2.14    0.221 8.31e-21
## 13 CAGL0G03927g     99.4  -2.03    0.225 4.15e-18
## 14 CAGL0I09592g    938.   -1.93    0.288 1.89e-10
## 15 CAGL0F01463g 108605.   -1.90    0.281 1.19e-10
## 16 CAGL0D04620g    213.   -1.89    0.307 5.18e- 9
## 17 CAGL0F05709g    138.   -1.88    0.322 3.33e- 8
## 18 CAGL0L10890g   2359.   -1.87    0.212 2.38e-17
## 19 CAGL0C01749g   1326.   -1.85    0.162 1.86e-28
## 20 CAGL0G05742g   1416.   -1.83    0.141 1.14e-36
## # ℹ 335 more rows
p_drugnodrug_bc = 
  volcano_plot(deseq_df_drugnodrug_bc, 
               DEGdf_up2x_FDR5_drug, 
               DEGdf_down2x_FDR5_drug,
               xlabel = "log2 fold-change\ndown, azole      up, azole\n←                        →") +
  ggtitle("Azole vs CT\n across both strains")
p_drugnodrug_bc
## Warning: Removed 16 rows containing missing values or values outside the scale range
## (`geom_point()`).

write.csv(deseq_df_drugnodrug_bc, "../data/deseq2/DrugBothStrains/BothStrains_Drug_vs_DMSO_allDESeq.csv")
write.csv(DEGdf_up2x_FDR5_drug, "../data/deseq2/DrugBothStrains/BothStrains_Drug_vs_DMSO_up2x_FDR5.csv")
writeLines(DEGdf_up2x_FDR5_drug$gene, "../data/deseq2/DrugBothStrains/BothStrains_Drug_vs_DMSO_up2x_FDR5_names.txt")
write.csv(DEGdf_down2x_FDR5_drug, "../data/deseq2/DrugBothStrains/BothStrains_Drug_vs_DMSO_down2x_FDR5.csv")
writeLines(DEGdf_down2x_FDR5_drug$gene, "../data/deseq2/DrugBothStrains/BothStrains_Drug_vs_DMSO_down2x_FDR5_names.txt")

Differential drug-strain interactions across strains

We’re looking for differential strain responses to drug. Here, “up” means that azoles induce more change in transcript abundance in CBS138 as compared to BG2. Again, treating both drugs as biological replicates, due to the small differences in response to the two diferent drugs.

dds_drugstrain <- 
  DESeqDataSetFromMatrix(
    countData = counts_all_joined %>%
      dplyr::select(sample_sheet_all_drugnodrug$Code) %>%
      magrittr::set_rownames(counts_all_joined$Gene),
    colData = sample_sheet_all_drugnodrug,
    design = ~ Strain * Drug) %>%
  DESeq()
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
resultsNames(dds_drugstrain)
## [1] "Intercept"             "Strain_CBS138_vs_BG2"  "DrugTRUE"             
## [4] "StrainCBS138.DrugTRUE"
deseq_df_drugstrain  <- 
  deseq_df_transform(dds_drugstrain,
                     termstokeep = "StrainCBS138.DrugTRUE")

deseq_df_drugstrain
## # A tibble: 5,124 × 5
##    gene         baseMean  log2FC stderror  padj
##    <chr>           <dbl>   <dbl>    <dbl> <dbl>
##  1 CAGL0H10626g     220. -0.916     1.18  0.986
##  2 CAGL0A00165g     939.  0.220     0.192 0.986
##  3 CAGL0A00187g    2413.  0.0531    0.279 0.991
##  4 CAGL0A00209g    6755.  0.0379    0.237 0.991
##  5 CAGL0A00231g     515.  0.865     0.399 0.546
##  6 CAGL0A00253g    2282.  0.0776    0.273 0.991
##  7 CAGL0A00275g    2973.  0.0260    0.288 0.993
##  8 CAGL0A00297g    1698.  0.321     0.222 0.899
##  9 CAGL0A00319g    1804.  0.322     0.152 0.583
## 10 CAGL0A00341g     654.  0.272     0.417 0.986
## # ℹ 5,114 more rows
DEGdf_up2x_FDR5_drugstrain = get_upregulated_genes(deseq_df_drugstrain)
DEGdf_down2x_FDR5_drugstrain = get_downregulated_genes(deseq_df_drugstrain)

print(DEGdf_up2x_FDR5_drugstrain, n = 20)
## # A tibble: 9 × 5
##   gene         baseMean log2FC stderror      padj
##   <chr>           <dbl>  <dbl>    <dbl>     <dbl>
## 1 CAGL0J02530g     466.   2.07    0.376 0.0000599
## 2 CAGL0L06776g    1896.   1.37    0.274 0.000471 
## 3 CAGL0I09856g    1624.   1.35    0.368 0.0246   
## 4 CAGL0K04719g    2367.   1.29    0.348 0.0238   
## 5 CAGL0M06347g    2086.   1.24    0.235 0.000134 
## 6 CAGL0L08426g    1163.   1.23    0.331 0.0233   
## 7 CAGL0K10824g    1318.   1.18    0.289 0.00899  
## 8 CAGL0M08800g    1528.   1.18    0.327 0.0290   
## 9 CAGL0M03839g   21921.   1.12    0.313 0.0325
print(DEGdf_down2x_FDR5_drugstrain, n = 20)
## # A tibble: 22 × 5
##    gene         baseMean log2FC stderror          padj
##    <chr>           <dbl>  <dbl>    <dbl>         <dbl>
##  1 CAGL0F06875g  11158.   -2.81    0.398 0.00000000921
##  2 CAGL0J06402g   2664.   -2.34    0.472 0.000508     
##  3 CAGL0M01892g     39.9  -1.90    0.510 0.0233       
##  4 CAGL0G02585g   8260.   -1.79    0.385 0.00170      
##  5 CAGL0F02431g   5599.   -1.77    0.394 0.00288      
##  6 CAGL0F04917g    114.   -1.73    0.441 0.0151       
##  7 CAGL0I09592g    938.   -1.72    0.450 0.0203       
##  8 CAGL0K07788g  14241.   -1.67    0.431 0.0167       
##  9 CAGL0C03872g  10721.   -1.67    0.399 0.00763      
## 10 CAGL0I08987g    949.   -1.47    0.402 0.0253       
## 11 CAGL0L02079g   7782.   -1.47    0.322 0.00224      
## 12 CAGL0K11616g   5367.   -1.44    0.340 0.00697      
## 13 CAGL0J09240g   9171.   -1.44    0.354 0.0100       
## 14 CAGL0E06688g    683.   -1.37    0.341 0.0107       
## 15 CAGL0C03443g  15756.   -1.34    0.297 0.00275      
## 16 CAGL0B01507g    734.   -1.27    0.233 0.0000599    
## 17 CAGL0G04807g   1090.   -1.21    0.321 0.0222       
## 18 CAGL0L01551g   1556.   -1.18    0.287 0.00899      
## 19 CAGL0I07711g    387.   -1.16    0.285 0.0100       
## 20 CAGL0G08668g   2336.   -1.14    0.263 0.00538      
## # ℹ 2 more rows
p_drugstrain = volcano_plot(deseq_df_drugstrain, DEGdf_up2x_FDR5_drugstrain, DEGdf_down2x_FDR5_drugstrain,
               xlabel = "log2 fold-change\nmore, BG2      more, CBS138\n←                        →") +
  ggtitle("Azole vs CT differences\nstrain BG2 vs CBS138")
p_drugstrain
## Warning: Removed 10 rows containing missing values or values outside the scale range
## (`geom_point()`).

write.csv(deseq_df_drugstrain, "../data/deseq2/DrugStrainInteractions/DrugStrain_CBS138_vs_BG2_allDESeq.csv")
write.csv(DEGdf_up2x_FDR5_drugstrain, "../data/deseq2/DrugStrainInteractions/DrugStrain_CBS138_vs_BG2_up2x_FDR5.csv")
writeLines(DEGdf_up2x_FDR5_drugstrain$gene, "../data/deseq2/DrugStrainInteractions/DrugStrain_CBS138_vs_BG2_up2x_FDR5_names.txt")
write.csv(DEGdf_down2x_FDR5_drugstrain, "../data/deseq2/DrugStrainInteractions/DrugStrain_CBS138_vs_BG2_down2x_FDR5.csv")
writeLines(DEGdf_down2x_FDR5_drugstrain$gene, "../data/deseq2/DrugStrainInteractions/DrugStrain_CBS138_vs_BG2_down2x_FDR5_names.txt")

Volcano plot drug-strain interactions with highlighted genes

Highlighting calcium transporters MID1, ECM7, CCH1.

# Warning! This code chunk is based on manually reading in a file with gene names only of calculated deseq genes. It would be better to do a merge on all Cg gene names.

calcium_transporters <-
  tribble(~gene, ~name,
          "CAGL0B02211g", "CCH1",
          "CAGL0M03597g", "MID1",
          "CAGL0M00748g", "ECM7")

favourite_TFs <- 
  tribble(~gene, ~name,
          "CAGL0M08800g", "YAP6",
          "CAGL0L06776g", "GAT3/4")

drugstrain_down_names <- 
  read_tsv("../data/deseq2/DrugStrainInteractions/DrugStrain_CBS138_vs_BG2_down2x_FDR5_fungidbtable.txt") %>%
  select(gene, name) %>%
  filter(!is.na(name))
## Rows: 22 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (3): gene, description, name
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
p_summary_volcanos_labels =
  plot_grid(
    p_cbd + 
      geom_label_repel(data = deseq_df_cbd %>%
                         right_join(calcium_transporters, by = "gene"),
                       aes(label = name), 
                       min.segment.length = 0, max.overlaps = 20,
                       box.padding = 0.1, label.padding = 0.1),
    p_drugnodrug_bc + 
      geom_label_repel(data = deseq_df_drugnodrug_bc %>%
                         right_join(calcium_transporters, by = "gene"),
                       aes(label = name), 
                       min.segment.length = 0, max.overlaps = 20,
                       box.padding = 0.1, label.padding = 0.1),
    p_drugstrain + 
      geom_label_repel(data = deseq_df_drugstrain %>%
                         right_join(
                           bind_rows(calcium_transporters, 
                                    favourite_TFs,
                                    drugstrain_down_names),
                           by = "gene"),
                       aes(label = name), 
                       min.segment.length = 0, max.overlaps = 20,
                       box.padding = 0.1, label.padding = 0.1),
    ncol = 1, nrow = 3)  +
    scale_y_continuous("-log10(p)",
                       limits = c(0,5), expand = c(0,0),
                       oob=scales::squish)
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family 'arial' not found in PostScript font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family 'arial' not found in PostScript font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family 'arial' not found in PostScript font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family 'arial' not found in PostScript font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family 'arial' not found in PostScript font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family 'arial' not found in PostScript font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family 'arial' not found in PostScript font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family 'arial' not found in PostScript font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family 'arial' not found in PostScript font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family 'arial' not found in PostScript font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family 'arial' not found in PostScript font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family 'arial' not found in PostScript font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family 'arial' not found in PostScript font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family 'arial' not found in PostScript font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family 'arial' not found in PostScript font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family 'arial' not found in PostScript font database
## Warning: Removed 16 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family 'arial' not found in PostScript font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family 'arial' not found in PostScript font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family 'arial' not found in PostScript font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family 'arial' not found in PostScript font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family 'arial' not found in PostScript font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family 'arial' not found in PostScript font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family 'arial' not found in PostScript font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family 'arial' not found in PostScript font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family 'arial' not found in PostScript font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family 'arial' not found in PostScript font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family 'arial' not found in PostScript font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family 'arial' not found in PostScript font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family 'arial' not found in PostScript font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family 'arial' not found in PostScript font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family 'arial' not found in PostScript font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family 'arial' not found in PostScript font database
## Warning: Removed 10 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family 'arial' not found in PostScript font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family 'arial' not found in PostScript font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family 'arial' not found in PostScript font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family 'arial' not found in PostScript font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family 'arial' not found in PostScript font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family 'arial' not found in PostScript font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family 'arial' not found in PostScript font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family 'arial' not found in PostScript font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family 'arial' not found in PostScript font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family 'arial' not found in PostScript font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family 'arial' not found in PostScript font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family 'arial' not found in PostScript font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family 'arial' not found in PostScript font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family 'arial' not found in PostScript font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family 'arial' not found in PostScript font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family 'arial' not found in PostScript font database
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.
p_summary_volcanos_labels
## Warning: ggrepel: 9 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

ggsave("../figures/summary_volcano_plots_genelabels.png", 
       plot = p_summary_volcanos_labels,
       width = 7.5, height = 9)

Venn Diagram of DEG lists

library(VennDiagram)
## Loading required package: grid
## Loading required package: futile.logger
# Generate 3 sets of 200 words
set1 <- paste(rep("word_" , 200) , sample(c(1:1000) , 200 , replace=F) , sep="")
set2 <- paste(rep("word_" , 200) , sample(c(1:1000) , 200 , replace=F) , sep="")
set3 <- paste(rep("word_" , 200) , sample(c(1:1000) , 200 , replace=F) , sep="")
 
set.seed(20190708)
genes <- paste("gene",1:1000,sep="")
x <- list(
  A = sample(genes,300), 
  B = sample(genes,525), 
  C = sample(genes,440),
  D = sample(genes,350)
  )

# Chart
venn.diagram(
  x = list(set1, set2, set3),
  category.names = c("Set 1" , "Set 2 " , "Set 3"),
  filename = NULL,
  output=TRUE
)
## (polygon[GRID.polygon.8483], polygon[GRID.polygon.8484], polygon[GRID.polygon.8485], polygon[GRID.polygon.8486], polygon[GRID.polygon.8487], polygon[GRID.polygon.8488], text[GRID.text.8489], text[GRID.text.8490], text[GRID.text.8491], text[GRID.text.8492], text[GRID.text.8493], text[GRID.text.8494], text[GRID.text.8495], text[GRID.text.8496], text[GRID.text.8497], text[GRID.text.8498])
display_venn <- function(x, ...){
  library(VennDiagram)
  grid.newpage()
  venn_object <- venn.diagram(x, filename = NULL, ...)
  grid.draw(venn_object)
}
display_venn(
  x,
  category.names = c("Set 1" , "Set 2 " , "Set 3", "Set 4"),
  fill = c("#999999", "#E69F00", "#56B4E9", "#009E73")
  )

display_venn(x)

library("ggVennDiagram")
## 
## Attaching package: 'ggVennDiagram'
## The following object is masked from 'package:tidyr':
## 
##     unite
ggVennDiagram(x, label_alpha = 0)

cf_up = as.vector(ortho_cb_c_up$gene)
cbd_up = as.vector(DEGdf_up2x_FDR5_cbd$gene)
cbf_up = as.vector(DEGdf_up2x_FDR5_cbf$gene)
bf_up = as.vector(ortho_cb_b_up$gene_id_cbs138)

y <- list(
  CBS138_FLUC = cf_up, 
  strain_DMSO = cbd_up, 
  strain_FLUC = cbf_up,
  BG2_FLUC = bf_up
  )

display_venn(
  y,
  fill = c("#999999", "#E69F00", "#56B4E9", "#009E73")
  )

print(head(DEGdf_up2x_FDR5_cbd))
## # A tibble: 6 × 5
##   gene         baseMean log2FC stderror      padj
##   <chr>           <dbl>  <dbl>    <dbl>     <dbl>
## 1 CAGL0K00110g   13966.  12.5     0.809 3.70e- 51
## 2 CAGL0C00110g    4327.  10.3     0.274 1.58e-303
## 3 CAGL0H10626g     203.   9.79    0.873 4.30e- 27
## 4 CAGL0E00187g     550.   8.81    0.816 3.86e- 25
## 5 CAGL0E06666g    2836.   8.68    0.823 6.27e- 24
## 6 CAGL0K13024g    1275.   8.65    0.334 9.32e-145
import pandas as pd

plt.rcParams['svg.fonttype'] = 'none'

rlog_all_p = r.rlog_all_df
rlog_all_p = rlog_all_p.reset_index().rename(columns={'index': 'Geneid'})


filamentous_genes = ["CAGL0H02145g",    "CAGL0I01254g", "CAGL0I09856g", "CAGL0K04169g", "CAGL0M03663g", "CAGL0M09042g"]
cell_adhesion_genes = ["CAGL0K00110g", "CAGL0A01584g", "CAGL0I09856g", "CAGL0K12078g", "CAGL0I00220g", "CAGL0E06688g", "CAGL0C00110g"]

s = rlog_all_p[rlog_all_p["Geneid"].isin(cell_adhesion_genes)]

t = s.set_index("Geneid")
#plt.figure(figsize=(10, 10))
#plt.subplots_adjust(left=0.5, right=1.5, bottom=4.0)
g = sns.clustermap(t, cmap='vlag', method="single", annot_kws={"size": 7})
plt.setp(g.ax_heatmap.get_yticklabels(), rotation=0)  # For y axis
plt.show()
#plt.savefig("/Users/s1964600/Work/random_figures/new_svg/cpac_scer_deg.svg")
plt.close()

Composite figures

Figure 3 - Azole drugs induce a consistent transcriptomic response, overlaid on strain-dependent baseline gene expression.

  1. Principal Component Analysis shows that RNA-seq samples cluster by strain and by azole drug treatment. Principal components 1 and 2 of the regularized logarithm of counts per gene are shown for all samples; see methods for details.
  2. Between-sample variance is concentrated in principal components 1 and 2, that panel a shows cluster by strain and azole treatment.
  3. There are extensive baseline gene expression differences between strains BG2 and CBS138.
  4. A consistent azole-dependent transcriptomic response is identified by pooling azole-dependent differential expression across both FCZ and VCZ drugs across both strains BG2 and CBS138.
  5. There are minimal strain-dependent differences in drug-induced gene expression, detected using the interaction term in a DESeq2 analysis with both factors (design = ~ Strain * Drug). See methods for details of differential gene expression analysis across 3 biological replicates using DESeq2, and supplementary figure SX for additional pairwise differential expression plots.

Load gene info

Load gene info from FungiDB with some of the important gene names.

CBS138_geneinfo <- 
  read_tsv("../data/genomes/CBS138_allgenes.txt",
           col_names = c("Geneid", "Transcriptid","GenomicLocation", "Description", "Name"),
           skip = 1,
           na = c("", "NA", "N/A"))
## Rows: 5615 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (5): Geneid, Transcriptid, GenomicLocation, Description, Name
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
BG2_geneinfo <- 
  read_tsv("../data/genomes/BG2_allgenes.txt",
           col_names = c("Geneid", "Transcriptid","GenomicLocation", "Description"),
           skip = 1,
           na = c("", "NA", "N/A")) %>%
  mutate(Name = stringr::str_extract(Description, "^[A-Z]+[0-9]+"))
## Rows: 5481 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (4): Geneid, Transcriptid, GenomicLocation, Description
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Plot groups of genes

Visualise drug-dependent DGE across strains

First make a data frame of:

  • deseq results drug vs no drug
  • for both drugs separately
  • in both strains separately
## # A tibble: 20,886 × 9
##    gene_id_cbs138 gene_id_bg2  Strain Drug  baseMean  log2FC stderror     padj
##    <chr>          <chr>        <chr>  <chr>    <dbl>   <dbl>    <dbl>    <dbl>
##  1 CAGL0A00165g   GWK60_A00055 CBS138 FCZ       837.  0.320     0.174 0.126   
##  2 CAGL0A00187g   GWK60_A00077 CBS138 FCZ      1592.  0.293     0.179 0.179   
##  3 CAGL0A00209g   GWK60_A00099 CBS138 FCZ      6112.  0.0175    0.178 0.946   
##  4 CAGL0A00231g   GWK60_A00121 CBS138 FCZ       428. -0.698     0.308 0.0543  
##  5 CAGL0A00253g   GWK60_A00143 CBS138 FCZ      1941.  0.213     0.173 0.330   
##  6 CAGL0A00275g   GWK60_A00165 CBS138 FCZ      2638. -0.106     0.151 0.603   
##  7 CAGL0A00297g   GWK60_A00187 CBS138 FCZ      1434.  0.385     0.176 0.0642  
##  8 CAGL0A00319g   GWK60_A00209 CBS138 FCZ      1575.  0.557     0.178 0.00633 
##  9 CAGL0A00341g   GWK60_A00231 CBS138 FCZ       269.  1.26      0.317 0.000354
## 10 CAGL0A00363g   GWK60_A00253 CBS138 FCZ     10119. -0.330     0.285 0.361   
## # ℹ 20,876 more rows
## # ℹ 1 more variable: Strain_Drug <fct>
gene_list_3groups <- 
  read_excel("../data/gene_lists/Gene lists Ribeiro et al 2025.xlsx",
             sheet = 2) %>%
  mutate(Group = as_factor(Group), 
         gene_name = coalesce(gene_name, gene_id_cbs138) %>%
           as_factor() %>% fct_rev() )
gene_list_3groups
## # A tibble: 38 × 4
##    Group            gene_id_cbs138 gene_name notes                           
##    <fct>            <chr>          <fct>     <chr>                           
##  1 azole specific   CAGL0A00451g   PDR1      Multi-drug transporter          
##  2 azole specific   CAGL0M01760g   CDR1      Multi-drug transporter          
##  3 azole specific   CAGL0F02717g   PDH1      Multi-drug transporter, aka CDR2
##  4 azole specific   CAGL0I04862g   SNQ2      <NA>                            
##  5 azole specific   CAGL0F01793g   ERG3      <NA>                            
##  6 azole specific   CAGL0M07656g   ERG5      <NA>                            
##  7 azole specific   CAGL0H04653g   ERG6      <NA>                            
##  8 azole specific   CAGL0E04334g   ERG11     <NA>                            
##  9 azole specific   CAGL0L00319g   ERG20     <NA>                            
## 10 virulence factor CAGL0M04191g   YPS1      Aspartyl protease               
## # ℹ 28 more rows
deseq_4x_goi <- 
  left_join(gene_list_3groups, deseq_df_4x,
            by = "gene_id_cbs138",
            )

genelist_heatmap <- 
  ggplot(data = deseq_4x_goi) +
  geom_tile(aes(x = Strain_Drug, y = gene_name, fill = log2FC)) +
  scale_fill_gradient2() +
  facet_grid(Group ~ ., drop = TRUE, scales = "free_y", space = "free_y") + 
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1))

genelist_heatmap

genelist_dotmap <- 
  ggplot(data = deseq_4x_goi) +
  geom_point(aes(x = Strain_Drug, y = gene_name, fill = log2FC),
             colour = "black", size = 3, shape = 21) +
  scale_fill_gradient2() +
  facet_grid(Group ~ ., drop = TRUE, scales = "free_y", space = "free_y") + 
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1))

genelist_dotmap

TO DOs: